{ "metadata": { "ParamSize": 199, "ParamBytes": 15231233024.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "e38c2fe9673a12b9d3d4d0381acbb1af" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "90a2d01c34785d54a9907f1c971da007" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "d4be9a35b1d7ca125528f358aac235f0" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "3805ded574d61e11012d78b45294f144" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "8cf0d8799d20241b6045e730e7639089" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ba99775a5ef00921bf7b69338a83f519" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "0cc3e8086f4382bcd41a6ecdb14711c8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "15ef38704c5b49b561845c702d0bbc32" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "5e0a2ec1b2d97437cceefadc0a6f5294" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ee8aa1d21b87f2bad1a8a0ee3647af37" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "dcb87dd1c0bb7402d69204c343f977cb" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "fac97b7807d9d2b78ceb8b3480ddf809" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "2314eed07d89b700f6b30b4bd7f42113" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8fdfcd172d95006598eeb652e4f9371b" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "d4976327c08031515622c04491e4dda6" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b05aa0bc19e6445ee8882b53821d6c3d" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "7d44b8908dd2835ff1a3250780dfb70d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "32aba497f2ff1cd9fbe97eabe127b702" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "32d1bba52be68ed2bc06c22b00a2224e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "cb78890b6fdd322d779d25b48740c556" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "404b50945d81e3118a6168b83e91455e" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "357da34a36fdf5bcebd305d4d2f335bf" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8e47b62c9797cd277d30e612ea5cb5cf" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "09169be55a244e713a9054f3ccb9a120" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d0dc164550de99309cb8e52711e07d48" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "0534165b8ee42a847ed549451748299f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "eff0d6871c3fdafe58d7695d80bf5b35" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "3f1d41c576164ab6652ab8ab605f42ed" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "e55992d0d070bd1ecd35e0d342efc111" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "bf9138d78f2e03858eb7058473aaa805" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "e0b2ea0b957ecd4e8989e9b8aff5d605" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e33251a8845c1a97619fa3a041ef22e1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "b412e3d2d9d3713086dbdaacb056b519" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e757bb777e587edc3f99e1981f055dfd" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "ce7546ee7d2599037a39c413e6fd4616" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "55f47c54426d55dde3a6b48fc8f06571" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "48170b5d613e47b4f9c982f1ecae138c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "8f55a6173448340a22554a8cbe7ea0ed" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "5371cd356e8c8ca506e00d40334b9656" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "3a33614b4b719a8655b6abf3401d3140" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "d4ff3c15a44ddfda4d4d609267be7faf" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "36acc406d178c0e642f6c1dec47de072" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "34e431df8d9baa3aa2f52e9c76ecdd1e" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "90ad95d7d3c4da2dae4075710efac117" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c25f27422d6d52bd4516898a8a83fc44" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c2c4fc3f344a0e8877289c076c88f7d3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "702847b3534c6d8f691c180d64be7374" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "e7f122d51b6d0dc668bf38304cbe451f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "fdcd0dfde63bbcbf8d19ccb066c2ff1e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "d64d2b5a3a61be6696e2e6b908dd84c7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c8e0393ca5baaf097a7db00f74b0750d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "926b380fd470b4dd6ba99e41c0fc8a66" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c02e4af23acddd3625d9789af86bbc0d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "42c849ac061a8cb16c73070acbec0c11" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f55f9cdcd742c7f368296ee9f025ca97" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "366bea9a7cfe580c9b8c80cb54bcbc5c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9a5668da66ebe743b735fc937ce920cc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "4dfc529c31d77956bd6c58da5660aba9" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "21db0633d3a1343a6eb0621f53fc15ce" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "01b3254ff7e9c58c9f69f2162a487393" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "592cee6c09cbb333d1054f62d6ddc39f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "3718b1a3b45609b7154e7f849988ff84" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "0594c6a502e9e8bf8ca3c839cd44092c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "24104e2ba9c9bc985e43c93520711d19" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "df1be68f7ac13c361eb4dc1709faa7c6" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b7933e4e8f3e253c828fc9578bbf6b64" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "6194fe8e15fa09a09f9b2fdaea2a2d45" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "5cebf750c2bf4e996a5e762ea10d0a41" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "92f7516cfeae46ba2991ec5bc232c902" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "6c72927cfe3740ca748307a09f403dbf" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4fac06e41c3ed358f486c04683019c72" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "41fa6e943d3e1778857d8372cc8171b0" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "f1cb3c1a92df28315b1827efd63a18c2" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "d3e86213628bfe358288a88db92a1edb" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "f25a138b7ac6c8c8c80fb43d1a8ad930" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "af2662038aa23a9b67f4ba721cf3cf77" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "0a0819b8803e9bb4f106d1a9ce45ad65" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "955c2b6036119360cf4409cb39bf9c78" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "f188bdd14be7a4488f5a4db6c59966cb" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "23cd43b7303955df670cf19869963bea" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "657b3e2cd8943e992e76ee3fdb327221" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "5e75910020380669b7ca29a0b4792d6b" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "bd1095b77993d94c6ca0b16514a3edcd" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "68018fd84124d0c313af0485fd03e9c1" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "765e1afea4545c0ade127b2c5a4874c9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "33467646b2bebe4168f190d2df80687e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "317db552805b1589b869e4fd3d01ecb5" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "013a4cc0fe7fd5594b73fc934644f0f3" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "7d0081e68abe72c5c9b041358c351542" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 33548288, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 7168 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14336 }, { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 23552 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33053696 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33060864 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33068032 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33077248 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33084416 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33091584 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33100800 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33107968 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33115136 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33124352 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33131520 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33138688 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33147904 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33155072 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33162240 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33171456 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33178624 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33185792 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33195008 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33202176 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33209344 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33218560 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33227776 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33234944 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33242112 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33251328 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33258496 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33265664 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33274880 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33282048 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33289216 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33298432 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33305600 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33312768 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33321984 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33329152 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33336320 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33345536 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33352704 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33359872 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33369088 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33376256 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33383424 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33392640 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33399808 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33406976 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33416192 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33425408 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33432576 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33439744 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33446912 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33454080 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33463296 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33470464 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33477632 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33484800 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33491968 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33501184 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33508352 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33515520 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33524736 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33531904 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33539072 } ], "md5sum": "3cfbd57db519715d82ee184f3d700767" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "09ee85d33af4a295efc64071b1752f6d" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "708b68f9ebc6df0afa1ef7440721b370" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "20ea1ea2a6a1a2f5777d2d501838ab2f" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "10ca2da79e16c45f11bbcea6f4b7280a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "469f737a1f1a8bd582cf3adb78b338cb" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "a52fddadbfdb0365f207d3ac36fc2d85" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "db46c4bceb75d6d47c910743c0605899" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4bbaff7442964fec09eaea586fadab0f" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9ad8a893a4879b8661bce56f22af7f57" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "69c79dab13683c7e179d21739db34b12" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "432a9a0e7db6f8f15159b6728ee9a2af" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "35ddf19b172084242860a6f5bf86e9da" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "25a9d9d79b5368b97c1504d4dbc4c642" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e668b6c33c6c85b1a7c3bb2ff836e364" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "02b4d74279c5b7b9f839ac26d0d2dd91" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "75f996553c5e4011f37e942dc7447478" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "926c5b1a9ed6d35839ad57d8ea23f195" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "80435d29c9caebc781068596e669e3c6" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "0291d9c3bbf76ff5c3b9d51c66d7d26c" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "156814f215bd07b352065e0733b0b132" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "113f76319a33a31b972d8b8747ea38ef" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "80c983d430e9bdc41852cf35d6947ee2" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "f1e9a914a0589abff3842ef96bebe442" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 33178624, "records": [ { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 7168 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14336 }, { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 23552 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33053696 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33060864 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33068032 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33077248 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33084416 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33091584 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33100800 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33107968 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33115136 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33124352 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33131520 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33138688 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33147904 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33155072 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33162240 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33171456 } ], "md5sum": "fdf81de51b639e8c3383fe8e2c5e80f4" } ] }