prune-llama-test / ndarray-cache.json
so298's picture
Upload folder using huggingface_hub
1f3aaca verified
{
"metadata": {
"ParamSize": 123,
"ParamBytes": 10825834496.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "lm_head.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "a6945d49d5d076ecd0be132b67ce3cc2"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e937b2e11737a73c8433773410127217"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "9820d4566fb22a3df1144dd5a69ec2a9"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "27453451f3a7644c0b0d69c120713259"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8453d2213e40e4fe25059741bc038f22"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2e42464286ec5527bb482582516025af"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "25c7237d9dd39b8cf9dc22dd74e89858"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4da37612aa2935e67762390e54ae10d2"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5918a3f86ffdc42da7fa41dc9ceb3f5c"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c46f0443f9a29232b648b39c87096c64"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "8feaa56e620d97c12acc424549c7fd72"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2e455d7fba4daa0bd2a79271b2a7377a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ec227f02386055d2ace2632fe0d81b40"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "47f36aa219dc4c49ed6bd52158e2b2f7"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "9df38297188d2a42171e222ae587c1b7"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1d2d346bb6d1bc723bfafb605ba8e09a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9cd24569bfc4a61d1764f8492740adc8"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9cf43c77adbcbcb982f9a216fd3b61a6"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "58fd08562dd46f8a059806516f766e97"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "61bb85766246c8611167e3a1fd8dea43"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e75f953fbf0b15ad94f81d7e518c6ef7"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "21496883da67d545566000775c70c9ec"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "6b3123c5b1aa53e2fcff15a645fbf218"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5b8275815efb016b68621153230c10a8"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "79e3ba10352141535f272d1352b135ac"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5090361f5a224c97673ad73425805ddd"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "728425b4ea63dc12526d8054e57b152f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d8d0842f3180c9ca701b281a75c837d1"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8fef16ecf6532754745eb9a0aa721681"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "17a2f7286a3e2205d22c0c38b849016c"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4ee8be083d815e383e94accdd7b676f0"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ff96989d45e2ce4d592d7f2faa249b30"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6551b7aea7c2bdb9f20a33b8834381c8"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "644da24b38c91be32847e179accfe569"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c78179872b7154b5537add5137be29cb"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "62445893cda6f55efd335139dd8eadf2"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4ac6511a6fe5a418a26de295bceb19f3"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "b933c0852d0cfb4c3658927fd80f95d1"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2a181db301d6d2c8b7ed206b7edbd81c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4aac1045d80691db6eaf51cf34ba3e04"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0675836793b33db17b8b1cb97e371e9c"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "667597c1dd1fdbef273584f5667f1f7f"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "02164c18545c96b8fa943d3a9f0a4fe5"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6fa8ce42feedccb6a5c91f40bdccc3c6"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "daa6d8d3d717ddbbc40d430323e37d3d"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "98326f483ed9ec944c73e7ae8726aef7"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "535b6fcfdcdd18fffd1aeea3938230a4"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f2958338b7e2f9a6482753f7cb98f5eb"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e633ccf8732c712ee093f8920e609f9b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "a0e2a0c72a558c74b324cca7a9110e9e"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "49b9371a3c79460d5a99f53515735e32"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "731d92900483226505602be53bb73b4a"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9f825f0c09ed9b0c300a7fb485331503"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "3e4347288308d05a790fe02c901f795e"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ca5151be3fc277317e64fe2521d22d9a"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2e2151f16cf9dcf4b4fe736b78708879"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5e81322a1338996066675e2cd8eec9bd"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "623eff2bf8cb5544d7e76f390fd6c498"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "603143f55b7cece63f204d7db165583a"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0fb8cb3f987dcd985c55e7a32951e824"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b5dd2420d9e43ecfd1e5870fee46bc02"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e54c4b303eb9450da6e8d0f260a3d432"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "99dd190e30d50bae591a456b5856ed63"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "695ac0ae0f2d58602ccd0d29f664c3c2"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6b745f2ce4086dd034d19b112ac1ded4"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "6acc1ec8e97f1433e6b705c7bfae90c6"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "67d57035608ae2ce4438784d8d677826"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5b98eb8ad81f9eab3ec4323bd10e03d8"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "dbb6a2e8d5c324712083fcd936252b16"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "f7cb954cd06dba30faecccdb99f15ba8"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a1fcdd887e2603ae1084ebd00565ee9c"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d65a1d27568ae023351d27641db5da25"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ee3f4f996a7799a11fc1dc691bea217e"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "64ab027aad105cfe2a92a07e39a89818"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c2eed27ce53331199b18b0bc9ccc2615"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ef2454eea2929d54578e357822aeaf9d"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d1ac8f90a27359fcc7a545adbc0f52c3"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d33a7619353629a3ee4ead844ae58552"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "233ead72e7fceda74a9fc1e881ebd92d"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.self_attn.wqkv_pack.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bdccad7d751c0580a8132880c098f45e"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fe682c84680eca02f330bc6a7c03d240"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 335872,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16384
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24576
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32768
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 40960
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 49152
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 57344
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 65536
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 73728
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 81920
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 90112
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 98304
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 106496
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 114688
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 122880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 131072
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 139264
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 147456
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 155648
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 163840
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 172032
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 180224
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 188416
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 196608
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 204800
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 212992
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 221184
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 229376
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 237568
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 245760
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 253952
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 262144
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 270336
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 278528
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 286720
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 294912
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 303104
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 311296
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 319488
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 327680
}
],
"md5sum": "95e75830965326aea19e2926c2dd6d31"
}
]
}