{ "metadata": { "ParamSize": 254, "ParamBytes": 6171877376.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 622329856, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 151936, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 622329856, "byteOffset": 0 } ], "md5sum": "f5a393b8d9552a89cfca11fc481c720c" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3d540cf5c2a2ab7213680dfd5e0ed5c6" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "361f7aa8156f8de998b7c1c9d5bd5b44" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2d08c7387aef840ea287f19307e7e47b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "68ec79d0607d550144ddf62b3b97375c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29386752, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8192 }, { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 13312 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10499072 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18887680 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18891776 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18895872 }, { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18900992 } ], "md5sum": "be1d43fb36dfcd1d840cdfa9a64e6504" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "55845a8df01f10bdbe20c22630c86bd0" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0c3aa0df157dd77f7964f8b850b6cef8" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "756a3536f62270dced8069c7bb6887c3" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e5c0162e746af3fd7cad0c0fc3561670" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "8b8daee7d14172301305932ad932ce1e" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9dba88d01c9223b0db3bacdfe891cba1" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5a208a69509e8269ebdf70084003622f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "79d89ac47f3bad07ebb1d8224723eb16" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2b91f85cc366d0b5dc4f54faf4e9bc86" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5be766448b63c3e23c78615fa32f47ef" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "29920b1d0a1597968c159f0910c1333e" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ca01d78b7ca8cd14bf111d442effafa1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "fb7bb6dd3ebc588cb4b1a289c4c8783f" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "48eb23dd76b7d7625884ba3d22dd2e29" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3265f74bd740a3d379c1beba3be1c301" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "0d8055315e319a625b41e089524004a9" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "21bc3c7e04f0a5958960748e3abd2043" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "fdbb2860750fd623c46338922d26bb99" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7a64f2831f888838b56e94afa95e157b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "b733a9aff8df2ea1183a7012cc2157a2" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "17f0b9c174f901e4f6b0782bc3576a76" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7c398287c9e650b6e4425ee5f879e843" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d80df83bd3abbaad494b4683e14c1af8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "911be85da7cad89831e35d199384ca48" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "498854d30f912cf609e23b94793a49ed" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8ea96b32450d8aa8d6550e026ce5768c" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e3d9fc23b0a006e3e7a0493bafccba68" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3b2c917b2d2ec4c67ffbb24435542764" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "8ce54448e11d50e8a1e4ad85b4e4fcbc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "05dc8adc403fad378247a9b7d42b9483" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5e189ac5c1971a80d1551a6b58b025df" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "cf17dac10f62814bcf7e0372f88c5c0b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5566cf9480e904a8ba342139821f1d61" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "dbee37031e5074f467abfc580c0aa593" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "dff0fae26d5069ed2becc87fe944a7fd" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0e015e559f95df6775bb50f225cff253" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "64c3faedd4c8a1803cfa825e2fd65306" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cc1fa4af006e2bc42b6541a39ac16097" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "a42b5d97993cb7c161b2fc0ba52935af" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "8df062b02e81c931831f35ca311181a3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7627c959b574c2fa6af666e523453983" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ca23489d548bb01dd5b71e1cd8bf26d9" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4b1631cc9366d4391eee8ae046c847a8" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "f94e7f38b028d59098c2da3553198922" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "9c55a1ffd5079da4b28e87005c6c5b09" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ccb81925165a6b3792db857b0dd5f513" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "f9ced110c94a6c01589856e9aa9fbb5c" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "4afdf4c7edafd83e9f39d21258fd8ba3" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bf3e2c92aed55b70193a7e072ca44423" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "b717f1178878b6a4dc39b7feeb498e2a" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 27281408, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27276288 } ], "md5sum": "b0063c03062c2e398d846b744ed6cfbc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e287a99cb9eb77da12b94a7846c5e973" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "82e3c9ec032752a2dccf34fced07f15a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "1675a7024c1e55381fc4edfd09f753cd" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f41be16ad3c5df95d89ae0ce03d73fc1" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4c6c01f318c1efb7db0c26f73e2b187a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4e4f72bf6c1e54461887736061f461df" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "2f62dbbea1d3d863c9187285ac371ef5" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "9edb92a2ad554aca529ec8498b9fd8b1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5614ae3d2723d66fa8f16752d4418ba8" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e5cc2ae2437366f1cfa87c4705a45a0c" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "a1deaf6c656c4356f3b9f009a8dfd9b2" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7c8c05324c6b564e0a71e9a1e9ecfbdd" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "809efda0b47a07bce998f3c3119f6dc2" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "24b65732b756c3dabdec06dd9947f167" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "01d3577dffc5c3dc054c71930d111f63" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "42688c734ce901d899f6a390fa2bf77a" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1ed8bf2bc28ad779f7017838474b86ba" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ff0bfebb0ded1a73e4966a63e781a574" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "42973493bf448c27b25bb30eea7ddfcf" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7e0e6d1c53944bf098e4ee3af3027da5" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3ae79ca042b61e0ef058d72f76b0a1a6" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cd5e45e71312882f65df34d95a6a7313" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "82648cad80e633408986e34866739509" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f06e275b8e7d5a2d571758eaa3225d47" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "a771f5f3b024af75ce664962278e389d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 27297792, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8396800 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8404992 }, { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8410112 }, { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18895872 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27284480 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27288576 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27292672 } ], "md5sum": "0122d938098b616e5d69f6c52145abd9" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f72a16f5cfdf1cfdf68de32ddb1eb798" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d1cc12a1b42ebf767dc7c22696f861bd" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "a13f1cae037b4ee8054843fe183645ac" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "adcd7cf7020b85f9c86499d30fe934a5" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5876feb63dada54c1118feb9c74c2137" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5fed0f8818c16354f9a62093defa785e" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "711be6b5383eb8e9313725756910095b" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "42e90febe664713da9115d1cb4f6b77c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "13aebf4637ace6020f16b1445ea93c7d" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "890cc1b14a7b7b7cb3fb164e009caa75" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "89441a1212527c15edaa3c5831868667" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bf913875190550465485e861f02f475e" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "58b0420021e0cad377d493e2b839bfdf" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 27280384, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 } ], "md5sum": "9d4274380d0c8f511ef6b04bd0c14051" } ] }