diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,8545 @@ +{ + "metadata": { + "ParamSize": 563, + "ParamBytes": 145412407296.0, + "BitsPerParam": 14.781631589720977 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 2491416576, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2491416576, + "byteOffset": 0 + } + ], + "md5sum": "22f8336c75be056a785ffca206b5fe0a" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d23c544712be76ca49bcc5c377336461" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "3c5ebcba065699128b52e6327efa4f57" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 2491416576, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2491416576, + "byteOffset": 0 + } + ], + "md5sum": "b8e9478427df71702f5181a711e7ebc4" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "88e21ed9f7b250e696c43da3a9b75a2e" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6bae99224b4cfcfd1c503245e275bd3d" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "8f47c04a416bef75648cf660fce96971" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "bc99fd7b82dfd7782a26c20babbaf61a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "bbf0c1b5cb74e674cdfdc8d67df76a1e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "ac991d1949ec07ddf4a0fa0c0d5bbde7" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "392867f5794881bf47b5bd42c2b018e0" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "d8b010ec9795917b77f48d1701acf215" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "92ee0db30c6850a1a023ddc178d7eb40" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a4ae9579569aa84a6d80beca956e29dc" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e1d66e0eb145d27bed3c9867ebb6d09d" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "06cb052df70b4406d7e4c3d2c8c33e3d" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c8692658eef0ea06ce337f88f211fc3d" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "8fc13d3071301ac19769527ca83601d6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a4cfba3b4aa0e03888f04f1e5048f6de" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "668cd6f78ff889cf7d8b018e621496e8" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "764690f0e149e5a873393c9272be9e82" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "34ae9699fdbc652fc0b75efe2b5d6220" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1971b4d185ca8bf11e7c55713ae9782d" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "fe1d1f102fb2c2f0c6c09b62b8e00b83" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "5a4b0d96764750f01cb7acfe069bb909" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ff2b41c0e7211fb8b1e333c5f5e8e179" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "ffa1cf7a0b8ac3074ea5b44ee6dc4080" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "9e727681c2e5aec11fbd95b48ba8cb1e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "906b26f1992d5fea73cf10dc3820a108" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "98407ca11c79187354ae3d019ba81713" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "6d032f86074473c4b4fcca18d93865f0" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2d5b6604a084dd6d97e21d36ac268e89" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2ffe76af54224458ec0779b7d70379c8" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "874c5f4cd3eaa2be7540f40496188092" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "d198d9531ec04444f77706e7cec4a4e6" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b5e7847c034b7cf0459f465f144395f1" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "64286915c459acaaae21b7d4396bc1c2" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f806b9c02d27f07dc73cd31c57b9d687" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7c61f2672ef7e09a8a93a857c77283c1" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "00b68286d8776ada36d06fdf965e24ac" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "22bbe34ffb8d82e8be5fe1b852d15ce8" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "acd0fe2f1cea00d73ae4e0e641f8b0a8" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "335c2e01ee18576ee40a2d082ac3c30c" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "71b24425a4cf52a3f397879173534158" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "57ea51c055c7bf35f2a941ed0f85e8b3" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "dbdee275e593414e4d1f5356f9f44aa3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "3c816c5022309d07b9b1ac73a75a3fa4" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "96e56482348aa61270d60e7ba97415ac" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b2072a0ccf503eeeffe1a1e16eab8d0c" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "34d7b8f7c7a6c3481cbbded02e676790" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "855b4de1c09ee5426cbe23a41d49405a" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c1f5b8a671f0708c0e139de1e2bbd284" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "937e0a899d315cbb39c9dfa48f124823" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "2a933a8cc150c3f74a86d66406e57c71" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d24930cc26a18a4550970587bd6bf3a8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "412d421eddae1f6437f7554c865ce542" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "ffccded2245004dbb26353fd9933c504" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c5c658a1cb10fbaf623cf5ab8b3ca34c" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "5ba34865f5200fa8769439270714ee40" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "addc768d47263cda14b022f886746878" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "d4eedb8e85ab6d7372312e1c7f3c138e" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7f0674bdda40ec748589d1debc09e3cb" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c26d16c264cc895136944bf90369b6d3" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "41a4424286d1df593dab1c1b991c98dd" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a63e751cfd4af28c99558384d07024af" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1a676b91fc60faa12e2343416218809d" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "4fe3ce77d7540f8cd48ac166ce9d2e6d" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "885406eadfb7a70a597b6c0010ef85dd" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "93e40ffd78afd1beab958e5938f353b7" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "65c4d3cf21215f3f7ac686e1241609da" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "68fd59aa65e936fa43825ae244c86870" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7e04ef5710b97931c2677f7b53b68794" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "37dd748a0282a7d691f4eaba5aa3473c" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a5d07f03b8390eede9afdc9473feb73b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "09e3627f9f61603a4c4af591fa007cf6" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "ed037e525d88f5ba02b30f944ef73214" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "5900f4253efb26c032a6d075327a3968" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "fe11c123535250f460c9b06d62504a14" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2a69b017c1aed836da87b36773dcc226" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "53b6ee78169f3fcf77254bd4bf0a1707" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9dfa9c366ebe3c01d86d3f9c49cae757" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9c0f6cc3035089e13bb56799dc93e8e3" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "edc92fc3a6bcd037d71dc92ce72fd68f" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "37654343c45f6b14a2e7baa8935894ef" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "cf5648b96722966df15aea482cd2920d" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "77f65e7570c6d19994b027ec99064d5f" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "217658f11e1fbda1523f168442aadeda" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "077706732dbff3988e096b61d426b730" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "20ffe00a63f901c65bedf32e877c405c" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a5b435ffb109f0e232a5d908ad7cde5b" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c7208aa391f6aad1836d1390eb0ba5a2" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "7e200a7f9499351ea9171114153da057" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "42ad8dc59dca560f582f864362210a6b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "95a7153465cb0ea0d0c37186fd105651" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "cce5a120060082792ef3264bf1694993" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2ef2b9897a569ae252f4264b8d9d3c95" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "3b093478af1bdb3c33563c4cb4e65d1d" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "008477cf24b0d5394935202c307f5e88" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "b150061d70df2dbfbf46f706a1e64693" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5f6e11570228c899c038be0cb9a5eeaa" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "8e66ae7a94a19111989c74a283192119" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "2ee075e3c8c53156c2518290dfe88673" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "f2f24470dd190b109bd24d0375c70f9c" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "97f08a8e3af46549b459d5ccd967f48d" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "93e2a7a68782e092bfba3e573391c4e2" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "f1f4165b3d37db87af9419d74b6837cf" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a8ddd9da2bd52788da303e3fad8c955e" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "5cded158968a69ac93e16da0361434e7" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7d783cfb38cc29d553cecf207d9fe512" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "95d68fa483dc266cbb1ab7f47dfc5dcf" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7f32a3db253ecb7448285a17bfd6e358" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e429fae76821612cca7d99cf4f6343d4" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "b1a6bdc0db2fc93e05d17666868544b9" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3bf28c7a972b451a7f0302d4bbaa11c5" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "d592193e037de6507d7a197fd94a98b3" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1e3a8544591d89a6c128ec4df89faeb1" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "1735e1ca9b39361b4ac4b3974e8b87ab" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f3d3023278de28ae5666026d0748443e" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a90d2ddc52f625010cc5ffa3a543f09a" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "29fa34378965e81784de0a2245259296" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "ceac0ba65ec122f4408575059667c5c9" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "35cf707867bb54c4bcbadbb16be838e8" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "021310e071148d002aaadcbf758f39e9" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "180ead8e8efe2db8299dfd3d4f6c3324" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b870174755677b1d3b072b6aa6c694ba" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "614a16ff1cb0db36301c7572d29b4084" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "336c46e87b961bb677515321d8fd0086" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1a9c948e57158df2e62c4e2752fa0a86" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "59046f1f1b8566742f97c92e3646bacb" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "7cbd3c9d6abad81f8f2da1ffc84f916a" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "02b201a21d8430e8f0790adcb2c1120d" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "af1f8e7783da26081401876bd343ae1e" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "adecc794517696717175eea97b900ea2" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "94a431cac6e6b12cc3110424e16650ec" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6821a4f5691807eb60be8abf55cf10ca" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "2c88f51d83a66139b90b387970c23adf" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "092636200300a9c5bbc8251abddd5f3d" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "41c95d98c2c1c8cd26f7269af820716b" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "445eb24a0c00d312c121b5d09ff7a888" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b41e239573a2fa93cbfcbeff8c50057e" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c074cf8367f2d3eb0be13c1f0f2fc5df" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "479dafa00d9d73440cddc6d8970a86c1" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "99736ff628b2210796075c9c0cf90935" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "14b1d7c7428b5c7bad5d36e360280a98" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "bf04db62ee9b1dab8eb499c23992ef23" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "66fd43cf1be46574b9236c2d146792dd" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "cd770c592ed2c1ec23f99185c952cc3c" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6e67198751e50f93c036e74d9df8e1de" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "3dffd2b38be12289b86b16aace64dc10" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c24589a92aad5f4686d566a16ab263ff" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "e80275933a417985d2a6bed3abfbb9a7" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "870392fb532b063aa252904067c045b3" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "311ad96e9b5c31d96392d09373d6e696" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "73a4b1b74d1a4dc97b1dab9f0e854f3a" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "02aa926aaa7328b6e776e312cca3b745" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d355746885630ce8c8e2de53acc35f4a" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7f154c094ebae5ba62b553f64da110b1" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b570e46286cdf896a0f6521a4887b51f" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "3a2d3ac451e60aaedc2223c2470fd73c" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "891bd3823011943f51f406d182092b08" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c450827af89659420a0015c040021ae2" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "be49aafb405a1da6b60e8c384a43998d" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c823f274e012f6bef77e0a2443c6cfff" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a9092934fe65c955cc688b1408c95e24" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d09def4035a9b2271b5839d1529b7ded" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5ab290d00d62125eeb9460a2fdb76d9a" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "404469faef9b82094599fe44460f5f42" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "90a0d832f3ab5efe48680b72dcbd49ba" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "548fdb14266450c681339a4c5f16f440" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "e5a5f50fb5eee724b52591eeda43913c" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "0dfc0b0244a7fb3fa3908d67ebcd2f44" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "80ef52ec608e2fbcffbad20d78602499" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "889b71a6caa16bee3403f0968f58a5f0" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "42598eb4d4d7981b7a60c7e966051e8e" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "45089fa9f27db640bf67ed9bf65bb352" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "fb70f90f76b7c0644677b9e406646a87" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7bcd7e41bfac34e58ed22db49d4b513f" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b5da23cf13e8200fc920ea0a172f1a6d" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "4674f11c2ee4ba28d41f55c1ddee1a0f" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6a7f00e2608e437f03ccdf264eb52200" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "0f6c1985dbc19407413f7733e1026520" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "db9c1f2d2b243ae1aaa472187d40d80b" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "091112e740cb15fa42bd82b6052d963f" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "93a1764c50205c6ad11d416777a8daf8" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "0870b331e0bf449ff5354bcf8bd5d6bd" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b823841cac65ca4b14496d447d3fd722" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c92c628f0ae106beb1b0e19e455a63e7" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cdfb5bf9b9c8b8706eaf6ca698ef3bc5" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "efc85b6e900b89a55e3f3e172ffc0f47" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9ef676ef6e1a281a63d747442f619713" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "fb5a6ae9576465c3bdcb2b4ef2f625c9" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1430968297b284d1741dcf988decde95" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "5282cefc616dfd32f400c6e9df5daf3a" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6c556bb5177d4bda9d4a948c768bec3d" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "cd716008098a495e72ba11422b0ecb7c" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "f5db32a5040beadf438963348ae99b76" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b48e198935873d8038becf4b81ffb436" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "cd86eee3768af0379a82162d00e911d2" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "74578969c77002c4160bb2ac7334356a" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a7b9cb68caf0b48afe5c0341d5039b97" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1e3fddd40a394ea2a843d45254051947" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "fa756caf18c72256491579ffaaef9744" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "4039427fd686109a8589ca8d67e09e42" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "8f6acebf0f5e9705c39a1482ffacaea8" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a99a634c620d6cb150423a6a039d7f7a" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "fda5bef7a20aaef019d29a0ec9f991bc" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "393cea0d1c5821159c0f2c7b5ec45b95" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f13f69c4f779710632056c9efbc0aee0" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e25ec5ea3d3bf411d2f4067fa3355b8d" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "aa3b9ebabbb60efcd71d0d73d24e2cd5" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "88e00bb512b3736e4928c808dd306727" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "72cda2317fa179e9b41fbaa59ef7f8b6" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "67b2c1fb96cf7d776dcba8f185e5742b" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "24fadfc5b6a3fbd71bb194e999ec899a" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "6513dd20e3e13c4ae05572a7995af140" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "4b56a4b2fed3496720a85bc5006b33e3" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "99870ff016822e0d4f9eeff22db007db" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d926ea44206262b4aa82f5bc8aa38858" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "92c942315fa3c8bf0f9bb3ecbd839839" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "5cdcc929b6c5f6db3a7eabfc746e5434" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "61719a8ea89296b5051d09df7925ff55" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "026274365e9e62a9faba22f39e6add3c" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "112074aca7f82670f663a5e09f7d85b0" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7de262b922bd2751409c9e73eb17451a" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "52605ecd9210054f0d5012e4ad84ad20" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "b7fd8f4bd69bb5995bf6dfb3731fbdb4" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "7626ac75b2214b9345954f49ab90cd72" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f3d1795a777dbed93d3a1823787ca23c" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "babe7a0d120456c82299f2996c242a8c" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "995b939609a6949e1c1f7862626dfc60" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "662f5ff1b21f6f2adf326c108d0da6c8" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "514f7641ff1ec668590a10d622830c35" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b97d1be2fbe06e9d2b38cfff35fee874" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c79ad4d77cf3b3f435f613a8069e59cc" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "76e84a6667b60d23220c8f14b3e81c37" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "24672c87c35f5662eb6d78bcb5a4d2f0" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7ab6a9736e5ef7ab55eafe711147d9b6" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7a05480bc3407fb7ea8aac463afb99ed" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "982542c811bd7d52ff845b9f06bce3d6" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "3861e3143ce6739f5cc035beb27b2ee6" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "be23e1ff1f2bbe6fec33b5b1d7718d23" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "287299df142aac324c1c56a24986b4b4" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "bbbb59ba3a8c1028920af9db69e9f80d" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6a076502bc29c022e5e4577c01b93f56" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6019c7e3bc75b2702cc85814a3af2f99" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "4f02c71da6131b703f428ffbcda414c1" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "4985cd2a0eaa060665811031872684b3" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "be0d1c2adfbd5b9c1469063337aa9eb3" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "6f8905fb9ab944ccbb9b2294ca6d28e3" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3428d76f625c11ca6626104f255664e1" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "75d5dc2c449a13af7b8b148545127c47" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1deae93209e9227fa41e7965af28b031" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c396a5f706a2ace4d325768c72a757d9" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "f4eb2456ee1dd990102f58a9b6e1c61e" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "19de3d402174ef6c470780f0c07d1627" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "04750adb1c6b4fe12239434c594f5d7c" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3978d6dce8a59610c46e8bd4abeb1c7d" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "19933ede0486c325b548ee001a7f2392" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "003d517357ffd2227817f0a0b72c30ec" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7d6a8de9988cd5c3a6b8a053561a0655" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f1f36cf4b82b7e4fe9b01d9917a91cf1" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "6187ca96529abcf8eed2de8fd20779f1" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3c38eb30cff2364925b3f09821fb2c02" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "617dc5ec17fb85b6471abe97d9e968c3" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "aedc46d9003f8edcb2fad530d47c5a6d" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "ff0c3564cf72675610cd2627d50d22c9" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3efcd96195444261e380f19ff50aa86f" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "26fd5320992a24ca17e9874a3a141ab3" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6bc49599d15fd5b81d9ae70b06e09b1f" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "3fea78a229e473ec8e444c6446b074c4" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "16ef11487b5502125608d6d9a38f522f" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "19132d5546045a0dc217626cdd2cb3a4" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b9532f28c1e877964c4a9a94684eff86" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "4ab24ec1a23d45cc192ecb533bed316e" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "80b3407d95e25dbcd61d6de9ae35ff16" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a905449aa43f20681806d5418bbb8205" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e9112fcd74ea9aef3e959a4aaf8dfd41" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2d956f7096a0568dde5db894a173a38c" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "93507c7ee07577ae8d37e4334a814d2a" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9482fb1e69c4824ece08739815b2ac89" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c8d6b418d105720ba53298b6ededc167" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c2ef14897b21f92ed0836ec3b498394e" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "478115dcb0b35f6054ca2657b9b62365" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0363310763d08b95488b02d170fd04d5" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "9c6abb84ab78f5b5132f96c7bf9d46d7" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "33641b9418ff72e52dece71ba9914210" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0182deadb2340103cf3c32360521e442" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c1157d83e1089aed146ee8e161162be9" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ec886521770fd48a8b1d772b421188ea" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c8d11a44a32a710fd3dc7e5ec3d4b184" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "77b4e85ad3f5c85fdc2a0429aba76d77" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b9574b7fdb6a744712bab3579f6771c3" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "62aa54317f5dbc93f85acffe47a8cc9c" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "28a44296af17df6196357169e9c3d335" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "76820e3741168696cb59f190229ec0c2" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "584c2fa3157a38f6287fdc79f2c37b95" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1580a61985e80930cc85637951336d41" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "91122933f2538d6bb2c77695d50eb703" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3091ceb295e567b62121715caa113551" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "eb4aa7f60f8ac8ddc15b654ac5cba257" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c40c604ea9748390681df5b85918286c" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d47039f41127a5a45e59fbe4dcdde4be" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5e8626b12a4b698920fe0979335e3919" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "55f828c849d4fdc0a6db322a39965691" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "5d56d279feeaac395e1735d084058ce8" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "ba2cd5bbde942dc48f35c0ca2bdd2191" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8cc5bfd32eae1b3b8d5e548109910942" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a19ef8521567a897e7dcdc05b35bf1d2" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b95437fa43b3cf64d3b385c3e4c53afe" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e9e8713a01f467806e5c921125e164a8" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6527855f7f4fe03894f95759734dd707" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "ec03be3b128e76aaac933c5f9150ea6d" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "bcaa969b97b83b2a0f361eca17e9fa9f" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c628fd24cbd7f4d21c495441701faf02" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ebbf1929438d2c9637f743bfdda133b8" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "8ff9a0bba42200a62953e8b96eec6363" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c690a3aae47275aed3eb1d0b17642962" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e469b5ea0e700116a4bd992733091339" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "52fe50dde456e722c2799275884333a7" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "76b44e9df5c493e9cad936becd58541e" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a6aa889e86db2f97cb56310373a1d4cd" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "15f5b89dab7af3afb66bcc549df37f54" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 4276224, + "records": [ + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16384 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 32768 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 49152 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 69632 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 86016 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 102400 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 122880 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 139264 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 155648 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 176128 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 192512 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 208896 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 225280 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 245760 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 262144 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 278528 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 299008 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 315392 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 331776 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 352256 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 368640 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 385024 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 405504 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 421888 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 438272 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 458752 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 475136 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 491520 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 512000 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 528384 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 544768 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 565248 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 581632 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 598016 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 618496 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 634880 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 651264 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 671744 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 688128 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 704512 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 724992 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 741376 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 757760 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 778240 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 794624 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 811008 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 831488 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 847872 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 864256 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 884736 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 901120 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 917504 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 937984 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 954368 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 970752 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 991232 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1007616 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1024000 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1044480 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1060864 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1077248 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1097728 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1114112 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1130496 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1150976 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1167360 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1183744 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1204224 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1220608 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1236992 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1257472 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1273856 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1290240 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1310720 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1327104 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1343488 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1363968 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1380352 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1396736 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1417216 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1433600 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1454080 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1470464 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1486848 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1507328 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1523712 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1540096 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1560576 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1576960 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1593344 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1613824 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1630208 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1646592 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1667072 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1683456 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1699840 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1720320 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1736704 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1753088 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1773568 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1789952 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1806336 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1826816 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1843200 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1859584 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1880064 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1896448 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1912832 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1933312 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1949696 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1966080 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1986560 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2002944 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2019328 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2039808 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2056192 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2072576 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2093056 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2109440 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2125824 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2146304 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2162688 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2179072 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2199552 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2215936 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2232320 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2252800 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2269184 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2285568 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2306048 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2322432 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2338816 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2359296 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2375680 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2392064 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2412544 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2428928 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2445312 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2465792 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2482176 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2498560 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2519040 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2535424 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2551808 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2572288 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2588672 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2605056 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2625536 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2641920 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2658304 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2678784 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2695168 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2711552 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2732032 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2748416 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2764800 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2785280 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2801664 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2818048 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2838528 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2854912 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2871296 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2891776 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2908160 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2924544 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2945024 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2961408 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2977792 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2998272 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3014656 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3031040 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3051520 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3067904 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3084288 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3104768 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3121152 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3137536 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3158016 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3174400 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3190784 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3211264 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3227648 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3244032 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3264512 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3280896 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3297280 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3317760 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3334144 + }, + { + "name": "model.layers.64.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3350528 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3371008 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3387392 + }, + { + "name": "model.layers.65.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3403776 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3424256 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3440640 + }, + { + "name": "model.layers.66.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3457024 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3477504 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3493888 + }, + { + "name": "model.layers.67.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3510272 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3530752 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3547136 + }, + { + "name": "model.layers.68.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3563520 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3584000 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3600384 + }, + { + "name": "model.layers.69.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3616768 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3637248 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3653632 + }, + { + "name": "model.layers.70.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3670016 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3690496 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3706880 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3723264 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3743744 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3760128 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3776512 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3796992 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3813376 + }, + { + "name": "model.layers.71.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3829760 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3850240 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3866624 + }, + { + "name": "model.layers.72.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3883008 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3903488 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3919872 + }, + { + "name": "model.layers.73.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3936256 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3956736 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3973120 + }, + { + "name": "model.layers.74.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3989504 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4009984 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4026368 + }, + { + "name": "model.layers.75.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4042752 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4063232 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4079616 + }, + { + "name": "model.layers.76.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4096000 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4116480 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4132864 + }, + { + "name": "model.layers.77.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4149248 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4169728 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4186112 + }, + { + "name": "model.layers.78.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4202496 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4222976 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4239360 + }, + { + "name": "model.layers.79.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4255744 + } + ], + "md5sum": "d8213ce437e68a0b6c0ac5fd499a2158" + } + ] +} \ No newline at end of file