{ "metadata": { "ParamSize": 170, "ParamBytes": 17075361792.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1572864000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 256000, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864000, "byteOffset": 0 } ], "md5sum": "60ae32290a73af4d897daca869ced218" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "bbe91c0e9a7cc6b67a2ac8e433ad38ca" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "d6084ff7e8369ae1a3004ce30a5a3a8b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "180aa61e6f9c9532539c4257fbb58992" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "f4addb4fe0430f3cdb3d2319bb44ba62" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "b7691ec6a77ca8625c98f97bd1635b06" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "160e7c76a13c050aeec492a2e2d72aab" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b1d4c02924ac0bac0c95681ffe5d7163" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "c49146f24474b26a7832f3ab4a83d39a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "f01062ca986324737db0f17aaf5b99f2" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "484e852cde8ad23c2b0d8e5cfa5b5738" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4a09c9ce10ad9282efe2ac2b82221647" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "96f11c66cf3c5ca2e4a8521761f98e23" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "340727bd83ea2235d0074ec50233e934" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5c8c43c7fafd195e9d744b9ef2fe7217" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1777bcd65b8d2726f1003d2630ce47c9" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "9bb0bb3b2b4befa28a13d43b5c2ba0e8" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "d75e37069c46ecdd74b80f8cc06cb6d1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f61a8e4f3be662e511bf8d2cfa546a74" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fd2ad1fb42458ff412cf6ab0eacd07a9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "a4a0bec88d38218586cc0fecfe96f4a5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "abd666001603d179c6fe9fef7ca52b81" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "40daa99215dbc17975c3167e09ad31e5" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "71be785d02736b5922a6cae0c8d1a831" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fb2f730b5d66c9cdbc798358bef79ebe" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5165b9b2823cb5bd00219adede68d307" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "0f3ad4fb9d4b1c11625a92ab26b15565" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "8857080ba55c0ca02c1bfbd0204a05f5" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b00a53f899e45d7276e99354143f1c61" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c310bc2010941c90ab5501059839d251" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "fb0a228dbb72498a441e98248a05f195" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "1bca1e158bbac017220d40d0d261992d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a35cff8ebbb396c6b83d2a2259dd72c4" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a6e50ab55df754b9034fa46797bacb4f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "f0572dda1a68d882525bcf76be6ca777" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "291457429b0e015a4bcf8807b7fe55ed" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "df565ab47cc6f4afd0f77c7f05631b6c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "978f92f008c4dd62822eef690eade5ea" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "f3d368b4cab5b1fc97723ec6a1821f77" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "898fa212a2a0c5d7dd6c6e8db2e1a71d" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "86fca1e31ddb2efa2ffd3876e106c10a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "da402edecdd2e1a3c251ee0610a536e0" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "86a91af650313b154a76a83215b76df8" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "126bb57f096bf441e7f88808078630ad" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "87fbd15936e44077592e028134bc0376" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "00f38ed56e2d90c0227acd3acdcf0241" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bbad0dd8e2e23dda60e4e061454e3abd" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "09118299bdefbae7d50ffa91c5b224d1" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "053d8c972995b8d83e61744e6e314c7f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "ca72202cff818ef742ac296bc2600926" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "3686942021d12436094f0d9b3ef7b837" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "8470ce2af9e62816b6f73efaa79bd720" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0c344d1f3c747dbad5db78df451ccf23" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "54b10013400455dd7cf82060c38bc4be" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "192a3d2fcf7d3c70d3dbc0a93737ca88" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "96d24ad55a29534d380ce2fd2e6c992c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2bd0598739a7587684fceeeb39086c6a" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a1dd21a5a45c4d270e8df09a1b934817" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "29670e1879eaa9af540508e6b3a38f2e" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "4d0e8592ac518ce1aea4e0a1bae33cfd" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bd91da9cd9fd37fef6c0de05026c2034" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "12bc8d3ab3b5055f5f431c0971b91a78" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "520345638bf42df4cb55d8479ff770fe" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "ac934115b39eeb7519802192e1e103b9" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "63345fb1ae9f9b058b5883f41c33db14" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "dd8d733ee27a5e5dc6d41668d4d7cd6c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f5654529a63557c8e4005374d6cf296d" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3f4abeb620ef7ec88e4b7849c5658975" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "bd9921891a8e96b69c9a169e0b19ad88" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "0c0627ce937b0edd3e98b0dc2b163c5a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6a6beccbe8824ac3abb1d1ce498ceb9c" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f50c090dd54c9b05b0057e1b7afcdfd1" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "f0725fb767165df3f963fce9f3db89d0" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "873b23b9460ae71eddf36a532499c553" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2e49ca76bb62f6fe4a00a5c8ee53dae7" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c5af0bf1da75e442102bb2ab9e2b5fdb" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "09ec40932166e5700f99f736b1576724" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "aa670b96411b8d184f4545b37acaa097" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "760fcb364bc239c2e77c39b96a9a7466" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "977188d5ec51682ebc6f673487072a99" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "90770a8cde2bea125b1cd9b51e333131" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "69db499e0e83a0aeb51c212848da685f" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3c3bd1e3cc7b60660caf1d364538073e" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "14e408e470361728f075ca6e96d812ad" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "91252eb5ac7e8ff526b6a6539373e5ee" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "385639a8f299bf0e1086cf111c79feca" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e25ade1d0ba5a8f660c1ff40aa876312" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "93afa47a95a2a34d6209880dfdcf2713" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "20d81e0b42f15f8cbe66b37be4ce7a62" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "d3f4a3ac0c3270e6a18d9c69cc8bbc19" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f41aa07eb326a16bd1d4f25ca61372f3" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6757f617b416676c1ac978082017a06f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "1daf7b3195a362019245b4aaac97afa3" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "dd1649b386d47e14b297af0eef72d212" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e2c261a370277bf7345af16ae56ce3dd" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d0237722ad61da671a535e24681ac224" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "17032abea04fc43daf6864142a4fd24c" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2c661f393440ce8b8e3089da0b74df90" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "ae565af5cc648c6d29d43cf53253ea8b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "2fb95a887ab3b66c052a264e3fe7711f" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "918eb03c29d4fccaa534f6396161eb69" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "9c2be4c01241aca2b838e678f6a4602e" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5b05018af280f877ac02b185c02a8335" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b60134b8d6353407694018e6561c309e" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "1e725304f8124408742d3488dfd09f3b" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "ca7dd5e249a888b895aa3e19e01cf5ae" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3b58c08fd5f34ba34ffb26f67561fab7" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0236d2e8b4ca91382c411d3618a26be8" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3072, 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "86a1cb3d42ed8c7cb9c91a923ed048ec" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 301989888, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 49152, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 301989888, "byteOffset": 0 } ], "md5sum": "7f26d02cec6df2d416595c0227777d67" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 12288, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "831436910bc5f74746081ba7de504bb9" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ab01465acddcdcbfc3f42866c1178acb" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25516032, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6144 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 12288 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25178112 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25184256 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25190400 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25196544 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25202688 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25208832 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25214976 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25221120 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25227264 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25233408 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25239552 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25245696 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25251840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25257984 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25264128 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25270272 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25276416 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25282560 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25288704 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25294848 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25300992 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25307136 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25313280 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25319424 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25325568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25331712 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25337856 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25344000 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25350144 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25356288 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25362432 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25368576 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25374720 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25380864 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25387008 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25393152 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25399296 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25405440 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25411584 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25417728 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25423872 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25430016 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25436160 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25442304 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25448448 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25454592 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25460736 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25466880 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25473024 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25479168 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25485312 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25491456 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25497600 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25503744 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25509888 } ], "md5sum": "f3ad81e901d1821a5626ed9c99f5e4a1" } ] }