Qwen2.5-32B-Instruct-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
fa7a1ff verified
{
"metadata": {
"ParamSize": 451,
"ParamBytes": 65527752704.0,
"BitsPerParam": 12.329999342718688
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "lm_head.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "8bf83241e43ac68721913d2dfa887427"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.62.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3a2cd1521a89755d087c7ba2fa591a22"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.62.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "95b9a88dcea26230448ec154fd309afb"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.63.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4fd13fa67b9d5f79cc6485ed3d28979a"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.63.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c1c5ffbdad9f682c66f72cab3b2f9402"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.63.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8ec877c0d41243d26b338f0562d4abb6"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.63.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "dbbe3ea33a7aa6d151f8bc502ad9b64f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "cc0c1ba518eaa3a45514e23abe4299c8"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "750a88fb02cce3c9f9919e42baba12a6"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "da377e8667831dd98d0d0c22547bd75d"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5bdf20548218be77a7cc28ded0da6a22"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "626a8765ebdfb72e7d3324dada211c73"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "38b458ac7408ee93886b6b2d082c4b40"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "26e1f1547c884f3d6170c7521d568d24"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ecb48889a46e0cd9705965dea8ca8b78"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "733627a24c80d698dea227bed0290726"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a380a25fddf0adeee2453f41b1dc3984"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "bc8f376d52c6a428062337492099bdfa"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6a1658a6cd3a731449eb5a906cc7fc5a"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "340f1c8e1a808db4cc39c006d912f217"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "94a8ed1f476a15cd98dcf7f4a772e2e5"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0e687e6b0f756040ce7243a415e612cf"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "2a01718352968cdf4a68a9678c972aef"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "cac3fe21d05f4aa6aaf787987be8689e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "1857b413f6bd10e9888a6026d8f1809b"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "15f10db579cac98acf81a3a2180726f2"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c0ca940488253a2b8fa0d083623556a6"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d4cbfcc5f41e2adc544406cb0ce4ab5e"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3f6a60bec1f40d4c7244c28ddeabd043"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7cff4b6593c52592af05e39cfba9ab82"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "94cb0a9a50dd7b458bbbea475ea99343"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ca164b21bb804f2bb6d49875e99286a0"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b7a5d09f3e0a9ce96e4a0435ce7df127"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8145db65e2d1d6dad0a26f5d89b68bf4"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f3f5fdf5b130a7496a2aa2f2ed95ca93"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "74bf77a8b0b6bb746263bec57a06544f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b2a43d7b3a15e059fb78bc3ce78d1c57"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "38c28e49c3ab8e7c4796ca9d9f469166"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "2f324f045f7aa9d5ee601fc62aeed7e0"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "44191dad41262303d0b6e9d6553b35f2"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "660f71a6cb7b671ca561ae031780d3b5"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "8f0b35a54f22264278ceeabc5cc149f2"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a8ae995a159da72c7a5126b5a73676ea"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "70e63f491cc05a5c088be46e30d7a237"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "43116a9325c85d861ee524cc54c3285d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "be602943c36425031049441fdf367ce6"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "fefc52a122bb90b9e4160d4f21083282"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "44c326edafee467efbc3bcc081d152ea"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cf327a42bf8ad722518ec1f5065bc144"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "815db9399848debf0289298735fd1e9a"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3183e28451248be86a0503d29cac0ba8"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0668b933d0df82b676db0207a6f0dfe8"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "72070d68b4457530588912b42136553e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "54e25f7d5622098ec4816ccba2fa8a40"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "101194e3f29a6fa7fca3392ae71a6e99"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d020377a2d58c9a5a88b0bb9d3d6725d"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7d9305def91c159483fc90a50d8761e6"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "77312040fe72c0e759162f4da92d8b4e"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "69c033460bed6b238d95167a8a373a11"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "df321c1badefdf81f1ae8e021af33a6f"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7d1dc778a6d0a1391dba239a3422d0fc"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4e6914b150b6ede90e7d573f97d3fa88"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "14d32a094cb85b8d21d62eed56e9bc9f"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a5b2444cd06bc8370c1b2510955eb858"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7631a2493f512c0866b1894af0f3f959"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "9a8d1a5b0ae17c691a3f47bd6dd577af"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "515b6d77a5a19fe03a83aaf496c9c837"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0170e0410ea94109e16b9a91fee0003d"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0fa558ffd19835d3a02133f782b9768e"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "efb9eef8ae6e3fc857f865f95314c503"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "fe19ba3901cb7af7ec8a07e2940338b2"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4af2bc50b8cd4b5ba6f1e1df25eeb84d"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "480663ab7ea11557bdb28e9052df7f69"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "90b584b5015ee5243d5c352944695163"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "48f3874237b80f428f1195b500ee5435"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b71f052deed39570d4139917a1d698a2"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e05bcc9e16b866eef42aa09105f3d271"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a1dc616584480f814cbcc5da0b56224c"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e305c78d267a1e20e2a2fa849f67c56b"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "9031feff0ea7fdb40cb727ba244f34bb"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2ad41401f287b6a24ac57daed89fba87"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4abe4f498c949dead7cdef9a9e87a22f"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "fc8c3e78b21d686dc7d939a2d08105e2"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c9c6e287473a7851b8e7684c2a115024"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "abcab40cbc31e0ed742745421e4b65b4"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "938d107f0afd73606bffb7f00f44ad25"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d788d3d8ae166f0a619c88a3711a924c"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "62e1244d26b917c3c4d0ca004cba8e35"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "75c653de3e52fc99520ce3e29905f80e"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d51ea58285691f611d78cd02ce26c0dd"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ebd9332447c57517cfeac7593a958743"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0bfab887063b835b606c70898032b220"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6c210f559d703f4592ee1b437c292749"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0b32060a374a6fd5cb27c4f0a1948ac2"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "bb0e294734767c2cd14b75b3cdefd5e2"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3b4085746957cdaaadc0f9fcd0c8d815"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a0e56966dae73cc347e3439fc7bd956b"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "878d62b08e4679074bd287d54132ce6f"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6208900eca0ad97d0f973ca6b73a9467"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7a10c872f2e245573a11f489328cb86a"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "617800a9792578ea58626cc97e007069"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4edef1f89fae9c9a3a0fb4cf7d0ba9fa"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4502cdd09003027c2f76d3bf754e9253"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "caf2c302f6c233b1d877ba3dac853b8c"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "aae1d1063e0f26fc31bcbc61ed964c7a"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "091048af5fc512f2a8b0cd5eddd330a5"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "92040f85317411dd7792387e734b0c87"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "35e7b9a0f2306d1eaa1690abb064936e"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7f10c420b1401401082906bef611e27e"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "0e3e3499feab3e009c3c40551e507752"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "efe4cc48e7a69e52f9de75d80edd26d8"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0e7d7675d9750fe2f1fb07b940c31c0d"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c4537ef2899fef8356eb40619f9b5a51"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e3e98fa2cb8fcaffe09b196353963bb0"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8c5cd2d2c7b6117b8724b108578dcd94"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "76c9392eecacd28d1c8a960859112f54"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "43dbfe44305d0a296bb6da74255f597b"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "104915a0f842b7a0419101832ad3cbc7"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1f5a7b6d42cb544feea0db40fc284fc2"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b823fdd97ad9ef4996dd8ff96ab05cc5"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "8d643b15b5a2d4eba07b550da3ac3e5d"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "10eeae436effdf1384e3907950f9357d"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "36178a237622b0bcb58b75b770750df0"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e6e6df278a289f7b566641083311d4b5"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f2ccdc26383173b03130d3ef526d708d"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "810b35df6bbad5fbdabc72e279cf821b"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "058cf853f014928a58df7997d9c4de5c"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a511497ea7ad26022566ac65066068ce"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "9b147b97641c70ff6481b91e5c09a84f"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "cb91a194a947817eeac636075c36780e"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5139e8c5c43b17ad3b205351fbdf9acc"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "945275a89392ae8586b07ab6ef243057"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "a2cc19e92cfa9a3461b468c9b5c43fd6"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "928e2983a51a2b9448d25f9bf372acc5"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c314569027504f03f1f1bc7dfe2d9649"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "21973fbd3d55a15d0bec2c22187f89a8"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.32.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f4a2f3ac11b800e1a785306d3d6e4c09"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "65701d1def8b234d7c5b70f9604d8a9d"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "bc8496f3e09dd875463a44893a6a0f12"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "aa0465b4e7d7e66d88da7c8d175a436b"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.33.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "019c54b71de6936d32c4c7630fa1c682"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7405fef66e60745665619fdd88e394ff"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "76a6c24b88c4721f214e1ce572d22196"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1f800ecc0901116d0614f9785e8be745"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d60681f257cac7ab4c119e31923e7d4e"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "605af38d1de46b0e6f3b7601cb9ce273"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "90ae0a995e3761eb8084fecf4fe9f8a6"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.34.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "1e4b20b8cf5281b2cec7470327559956"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.35.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0628853f1b0f249d0c99ea245bf1b02d"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c2a9f22338360c1b8d3daf50934e7c31"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7a5dc9d5cfdc781b15f29bfc9ba24ba9"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2fbfc985aa3ba0f23965098e997caaf4"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.36.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "55d190e07fb37f0fa37af4d3b8e9e921"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "785074fa2659756360cc9b31d9325977"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e3c64009020284a002ae4aaf50494750"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "055fa478d2dbf81cc3ced6a47ab20c73"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.37.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "effd16464de2908676f2b5483675ad65"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "aaebd803cf478d8ffb0cfab4964b5abe"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5717c4b8e048b2676dc2d242dedc5aad"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "989fe41fdaea4b41c01c38dc3dd42c42"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "3f7faa7612d04b7a3c99f872fbee21d5"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5b230f18df49601ec948e8ce38aac649"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f90398d1e2b93007491236caccdc8b08"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.38.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "09995791efe429f4c2fc696c1a7c208a"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.39.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ff13316a1e0a82466495b8e0c38f7e89"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "6b0658d94bab5354af835b29a7be59e0"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a0ec51171b6adfb34fe32052c82cf752"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "79d037f6768d3bd1cf8ffff43728f2f5"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.40.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b163b3d7814b2d6ba81cd73808c6fdb8"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "1479e18636d33aca550146d789590365"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a081296116b4703ee22f6704d8a22b94"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "94ac620c9b351d28e790039f9da6bac5"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.41.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "fedd0c494d4f7923501fde38d321f7b4"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "070e4bffdd4cf4030b3e4c74d898439f"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ecf2e3620b5ca6f901fc3c0e68f50353"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2929dc1e91c7454abc36a9bded55eb83"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "71ba5e7f388b629d3b63b0ec83725440"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6e2da3a673f30ecc13ec8063703a4121"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "17d1f4ed8b26ebbcd67c84b49219b548"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.42.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "40eedf2a31713e0970e7aa65287dbbec"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.43.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c4e991a2f65757fd6b0c45780167aa8b"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "6d9140953cd3ac0f934008101206011b"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "281d865d1179e91c1ca06dd1b4ca2894"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b21eff725f9513a5ccc6a70487d5ac4c"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.44.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "57b0baea5ce26a48354835856787b898"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "57195cba5383dd4bcc53d1e0014ffeb0"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "46653a6dc93a1c325482f6db7fce7e0b"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5412751ecf265e2439fd6d66ba460a91"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.45.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0d4bb0c1d5ccaba2783fac54270249b8"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5eb1a2ccccd164c777e06d4e1e037dd6"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "31616fd48f0632d4fe4095502ffede4c"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a37784bbbf4a722e567300699f41ae8e"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "90d812751ed32508df3da71cb7de7fda"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "8094a6ab919904ef2857deeb8798a40f"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f834fddd452f55df81695c3c16b8d4b3"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.46.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c12f184b20d60374af2a92d3deb7a23e"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.47.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e1891734ca2d5386f30fe717427ffed7"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "68d37336d31bd5ea515f9ce58a5887ce"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "0896fcd5530ca9510d41abec3e916da0"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c2ce6f93f4f9cac65447f5e433ce18d7"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.48.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "8febb38777fbfb56a590e968030fc3ff"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.48.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e0358ef4d4132b46ab62a064f22385e2"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.48.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "2b2a9c34bc715c546a37fbec3d293362"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.48.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "76d438f416ef5e64fb7888952fffceda"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.49.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5974263bffa5d9591e2fd27f1d80721f"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.49.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e81222993066d9194285e327de8d557f"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.49.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f64b70baee4f1242b1c10f8789722096"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.49.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fbaf9f6e807fe5e57f223adc582c5e1e"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.50.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b6638d52b616d1eddfcc65a5a413f8c2"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.50.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "aa456c76d66b524369c51e475bb66bf8"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.50.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "dbcf0da5abc3287a42b2049d6392e1f0"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.50.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e068db487ec2561d789b907c6a32c940"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.51.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e1bc132bc1cfb27d0663c172b60587ee"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.51.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a468e0d867546666c63b47c308f67334"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.51.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "19ca5fc8aa7b8aca37062cbaef76b638"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.51.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8ae104e8420e8cab019012183da340ee"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.52.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "2b26e2aba7070769ea6ae1e85d2f4d21"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.52.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c1e3499e9551fd87b9e6a4d26d8acdea"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.52.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "90159beff9d4d61950e4746e98dcfca0"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.52.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3d61f51306df5ef973d6b1497834099e"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.53.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "19392c3121dee5cb8f41bfe993977ed8"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.53.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "8fa22319dfd5b456ed6b70e7c723c49a"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.53.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "35146444828dd5ec1c9eb71489d8f489"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.53.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "aead69344acdf746bc2f7d97a4f42d18"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.54.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "21d8863aeca009899dc1f0a9bfe2696c"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.54.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a50faf95a3cdd5e0535918ef468071d5"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.54.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1f27e7c3c3ea66f70e706308c6610196"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.54.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b94d091f52353c5873918d31454cb4c3"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.55.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "147d7063643eb98cde306ff59142f107"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.55.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d2ba937faa11ec52a033c52418d58475"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.55.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c6915cae05677c408181d31c1bd1e622"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.55.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "69975ec8dbf4b363dc3b196afe4ae688"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.56.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f11a350ff95430c19f6bb366e8b54122"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.56.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e58c72e8558392d36d5f2e4394204dcf"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.56.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7ae6dd8e728d0b008347828280d79d80"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.56.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3e9cee798bb637f1501c436ba4d483d9"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.57.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "010357438083741ead9839651f725bd4"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.57.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f86d31fdec9d8f0476080099c8dda8ab"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.57.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ce3067d42cf6bef83eaa23fa7d4f7f11"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.57.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c5139bde0699b35bb588c8bad734c818"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.58.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "db9c49a2d2ec3b9dc6eb4351ab99d3b0"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.58.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7aa97c791d016675d96efd04306068b0"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.58.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6b39a5a8f48d67ef306a70410ee26775"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.58.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3cbda3a8674f794875435ad2050a95f3"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.59.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "9618378a815711e3d01f54bfad5d4322"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.59.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "66170b368faf02df9019bcae03a32127"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.59.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c79efcbb69d9e5dd9d5843e0192e49b2"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.59.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "284805d3144bffc50c487ef361fce9fa"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.60.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "df23920619282523a0b183658e53fc4c"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.60.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "1affbc29eb99c6e5b57bc4a18d09bbb4"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.60.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "82be8662cd70bf3f22a6e4ebe001a1b5"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.60.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "213bed5c3c1c75e5065104a3249ffd79"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.61.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6c9d7e2ee8b3fcc633559a1c71160802"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.61.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "40ca240fbd828f636fe6361887f4f3a9"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.61.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "356b79c9417873073adb840aa44c8b25"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.61.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9b86e7ddd2bf311993557233996e7a0e"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.62.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "38d0b7fc31df42bfa81ca0af57b13e7b"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.62.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ee566b569ecac8bfe4ef1aa2ffc00857"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 2238464,
"records": [
{
"name": "model.layers.63.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.63.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 10240
},
{
"name": "model.layers.63.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 20480
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 34816
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 45056
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 55296
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 65536
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 79872
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 90112
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 100352
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 114688
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 124928
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 135168
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 149504
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 159744
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 169984
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 184320
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 194560
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 204800
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 219136
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 229376
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 239616
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 253952
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 264192
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 274432
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 288768
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 299008
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 309248
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 323584
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 333824
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 344064
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 358400
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 368640
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 378880
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 393216
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 403456
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 413696
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 428032
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 438272
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 448512
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 462848
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 473088
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 483328
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 497664
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 507904
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 518144
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 532480
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 542720
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 552960
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 567296
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 577536
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 587776
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 602112
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 612352
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 622592
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 636928
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 647168
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 657408
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 671744
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 681984
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 692224
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 706560
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 716800
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 727040
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 741376
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 751616
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 761856
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 776192
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 786432
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 796672
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 811008
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 821248
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 831488
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 845824
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 856064
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 866304
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 880640
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 890880
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 901120
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 915456
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 925696
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 935936
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 950272
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 960512
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 970752
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 985088
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 995328
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1005568
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1019904
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1030144
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1040384
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1054720
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1064960
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1075200
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1089536
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1099776
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1110016
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1124352
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1134592
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1144832
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1159168
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1169408
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1179648
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1193984
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1204224
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1214464
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1228800
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1239040
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1249280
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1263616
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1273856
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1284096
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1298432
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1308672
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1318912
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1333248
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1343488
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1353728
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1368064
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1378304
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1388544
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1402880
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1413120
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1423360
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1437696
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1447936
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1458176
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1472512
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1482752
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1492992
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1507328
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1517568
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1527808
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1542144
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1552384
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1562624
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1576960
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1587200
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1597440
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1611776
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1622016
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1632256
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1646592
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1656832
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1667072
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1681408
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1691648
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1701888
},
{
"name": "model.layers.48.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1716224
},
{
"name": "model.layers.48.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1726464
},
{
"name": "model.layers.48.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1736704
},
{
"name": "model.layers.49.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1751040
},
{
"name": "model.layers.49.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1761280
},
{
"name": "model.layers.49.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1771520
},
{
"name": "model.layers.50.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1785856
},
{
"name": "model.layers.50.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1796096
},
{
"name": "model.layers.50.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1806336
},
{
"name": "model.layers.51.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1820672
},
{
"name": "model.layers.51.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1830912
},
{
"name": "model.layers.51.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1841152
},
{
"name": "model.layers.52.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1855488
},
{
"name": "model.layers.52.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1865728
},
{
"name": "model.layers.52.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1875968
},
{
"name": "model.layers.53.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1890304
},
{
"name": "model.layers.53.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1900544
},
{
"name": "model.layers.53.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1910784
},
{
"name": "model.layers.54.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1925120
},
{
"name": "model.layers.54.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1935360
},
{
"name": "model.layers.54.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1945600
},
{
"name": "model.layers.55.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1959936
},
{
"name": "model.layers.55.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1970176
},
{
"name": "model.layers.55.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1980416
},
{
"name": "model.layers.56.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1994752
},
{
"name": "model.layers.56.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2004992
},
{
"name": "model.layers.56.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2015232
},
{
"name": "model.layers.57.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2029568
},
{
"name": "model.layers.57.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2039808
},
{
"name": "model.layers.57.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2050048
},
{
"name": "model.layers.58.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2064384
},
{
"name": "model.layers.58.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2074624
},
{
"name": "model.layers.58.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2084864
},
{
"name": "model.layers.59.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2099200
},
{
"name": "model.layers.59.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2109440
},
{
"name": "model.layers.59.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2119680
},
{
"name": "model.layers.60.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2134016
},
{
"name": "model.layers.60.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2144256
},
{
"name": "model.layers.60.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2154496
},
{
"name": "model.layers.61.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2168832
},
{
"name": "model.layers.61.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2179072
},
{
"name": "model.layers.61.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2189312
},
{
"name": "model.layers.62.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2203648
},
{
"name": "model.layers.62.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2213888
},
{
"name": "model.layers.62.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2224128
}
],
"md5sum": "a3d9affbabb167395433a90aaa743ac2"
}
]
}