{ "metadata": { "ParamSize": 483, "ParamBytes": 141107412992.0, "BitsPerParam": 15.488057747664403 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 2101346304, "records": [ { "name": "lm_head.weight", "shape": [ 128256, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2101346304, "byteOffset": 0 } ], "md5sum": "b450ad6564dd69ce196a7a66a8c66182" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 2101346304, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2101346304, "byteOffset": 0 } ], "md5sum": "309be16492736a951c706ce5cd988ca1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2cb5c4a21b1100f7617057559d92fee8" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "e80a43658a41c118d96ccd16ffd291de" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0b2feb540fd1882e85b2a4fb776f87ff" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "3c7d5c34217caf212ae28c4e8e2d5505" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "db59443e69ffedfbe49b3619c2acca69" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1db3d5083d7fe715600829d295c44147" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "891842995e082f022c0486826dfafbb2" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7062e3934fe98a31319efdd72de2e72c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ad9cac3f335c84c68f82819ea0875ec2" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "cf10d660f4c003d1c9f651c9af4f4e97" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "68e217d1bf72cbc715e00fccf0d641b6" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "5b1e6c8d28f7b002a01603d1f4fc4fb4" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "03d4b32b164aec53013de848e716bf1f" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "29b63b38be7729def6e34fcc08e929e9" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9042a7dd8258c61d0d4addd6b231424f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "86aa510da41fcaf32f018794e00a7129" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9dc9de2e57ca3e055c89c1a49fef73fe" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "180d94e0b23f1850ec6cb1f02b2f3e4e" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8b5700ae5038078ddb168b163fd9a2ce" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "0c4b7950c3aad4fddf760cdf50c80af0" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cd824ad2b43e598089f95ae5fb141849" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e55469e4899ffcc76e2b4fac554e988a" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c25bfaec98e2eeb604efe207496383a9" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "e1234baa895bad3d0ea5a260b8280844" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "17941bfbfbba34d7fed24f4f52bb16eb" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "42f7a32155912400c0a776db5194151a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "fac2459cf70fd4a08325c328093c7ff7" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "d19c1bc119550814be8f46adf199986f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9f371537d9af9742e1ef1f0d29f02f72" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "78ace93ff6a3c994bd5d23832c512264" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a474604d82b253969a05c2debc9df0a6" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "6fc33d33d6de1b39bc63902b5e033980" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ea19868ade02bc274b0a254486f95afe" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "34331b0a17cada3a50bf866d1f833fc0" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c323f2a221c26e990bc1c0fec0666bfd" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "581883703f2cde41487470242bfe54fa" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2b3b5ac77933f403cdde76302ecb677d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "1425099cd51aec528fbabf32c5fdcf74" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "b1cd012d8bc7d83c472c0c7b8c07bb36" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5cd866fbda260b756da43252a254b176" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "52ce133a25f605afdb9a0afcaa983acc" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a5a88bdfe9cae14c9a8997898200d686" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a1490835117f9a2cc7b81715d0e21e4d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "c5124d0666f07168033b8762643cb662" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "edaa47053708c6094ef2609198dfa66b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "d748e3ba5c8a7f5e32720ce11eb780c3" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "293fbdc4b9e6fff83eceb8c5a8a30bc4" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "70fe96145b892e1d92481dc5b000cf3e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "61055f94d18cd60bb0af0d6b1cb46271" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "126a6960219567292976394ed7592e6b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "af6838affd95f4f4ad0bfb1d175829b0" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "168044f9fba336747686104ee40c7028" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "db279a7cc50e30019324140e499d0218" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "9204896674dca8d0eb0ac5d041417455" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "517cc801c3affb8768dfa82f6f3a8f3e" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "cc9c0e3b91c4760e547982a965e34826" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "726057a5f0c1c07ef02ac66439c93af8" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a0172e472e1fa0a61fff7fd3db0aaf9b" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "446db73c1d7fba669d1319a5c657e50f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "134f02e32d2e090d12482218930aaf9b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "69cf0d34e94b7b434f5b8a0b0c9ba190" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "ccce553e65a3050b75371527589d2ade" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e09e09de276a1ec7bddc438ec63f2a00" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "b3e304343f04493328eecb9a7d832131" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "8b6446cec0cc8a42df38b3c09765ae4a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b6c1eb5fbd9a9327a259882c58b62f79" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0bdfe9e131883c645f940afa1885c5c6" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "4f8ccd81d890215cfff7ab5e73f3c49d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "6137cf8fce22bd4c670949794141495d" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a9d143f3c1371ca707234ce025508e40" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0233824c7ac42a1862ebf569177b0867" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "d0bf5a4af0537f4e06adcd22f032ff25" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "142cf058885af2436ff9249031c17ef5" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "be17e7cc7313810440382bf684c0fe11" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c6230383b3e0facba7fe9ccd750709f7" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "ae63945408806697798f0a5fd524c0d7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "dc85b24a0c45f8799bb9db35e48714b0" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "448065abafcf7adff769c369250f33f1" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e1e66ddc841e95558165751d82882914" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "f3ebecc044b5e93ee8c050bc428f6671" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2b4f5119d85362e74ad9cd74a3188b9e" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "32728f93c8194cbbb41fa28aebf6d05c" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "359d0cae0903aa8409da4d3e08e0a30b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0b74770787a10808570fbdcf535f2c1c" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e6980bee64be55eff5d7c2928065f9d4" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "998d65ce46680604898fb20285bfbaeb" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "70525719fac37d427cd3851465f36108" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "eb1ebace6b5114ea43749c2e237ce036" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "69b6f3a71313ae54124fa5efc48e0c99" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "8076fdef056a2158e3adbd375ff01d75" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "4e606a5eaa9aa09b406fcf87458007ef" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "58def0e4b35957a1c3518295748bf498" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0ebbf11ae61883ddd13a04f73dd2728e" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "ba554599d531b2c19d1fa84ae99c66c7" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "4ca475b57606500716569f598b374b4c" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5f147de735a92d4c174ae235068460ed" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "005bc79261483a247e93afc09f976c10" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f5a4658f88cc543bf13fffd91cf280c6" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "d1216ecd83cfd749a41e701bb0b15947" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "e794ad1c68505503900ab9aa75ab363d" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b77e881ac3753a56e33033fbed1c3334" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "4d8b42b9f6659044d8f50df65a456123" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a6c01d2ffbbb07a51ec1409eed58707c" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "47ff7a9d69e759eef338877d373b5a2e" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "bea5ad888447e0ed569b8cf632455e72" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "3ea2c60297e93e78378aed3e457de3e2" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "908cb73543c5e3edf04762513a52fbbb" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0ab6d75a50a793db4abcf82608c8096f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e3eea84d327257e825b3fd2c87e942c1" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "fa2cdfa870f322df2d7747f3bc652f5c" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "19cb5983f4fa3b8f2fa0434ee2202af0" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "9e2f7741ff17673f164e93e13aecb481" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "04fb12ef56d0e8ffce1bf6486ce4273f" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "20ddb0f9db244ad6eadc2947bfb2e043" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "740e67529bbdaac18357f5d63916c7d2" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "87bc57edbbb2dd3d4666996d373e4c0e" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c26cee960fbff48b45b339046f4772fb" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "4c7d1bd19e59331c92134aca1d4cab6f" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "961318b26a804ef1406fc0c9794596f4" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9da61f83c95db5cee0bc8f45b6922f76" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "5d90d625c57b47ce6868c976e83e1366" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "dd48bcdfc913167ca7adf262d73899f8" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "168f475ef418a849362f0a2394297ae8" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "7781a67ed190b390595fc0581df374f5" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "f2a2bd8781ed0fd741c8b79525efd703" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "8fe50b164c7312a4cb68c83d7b16670f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f3bd589c858b5d95c4cc631f1377465f" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "a817e74d0ea7a4d7367f8faa29c2a025" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "490795964010fed0d085f59622c2a390" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e2a81c82c986b155cf824efde8726af4" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "97e28086a7ca6a23d792798f865307de" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "c82ff26bb8d2cc19f236440979043732" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "d2dd23cc0b46ffffa73b605870bfba50" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "42a7991e9385b2dbf201471aa4350b63" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6ac2a6cb1dbd8aa831e5f4887d6eb567" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "0b9af05586b6542037f79c499cddb356" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "81a1202b54eec1c38eab6404f48542d7" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "593fa5a5743e94e46bd08731b2d54cbc" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "2a8d75517280240cba46735a8dc4df67" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8b6cc23f03056a77a11c76728cc5e19b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "edf992018f76484dde8532e131be105d" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c74f8a7d21d867031cc37b8b723edc4f" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "835afd5769cdd1b85f41e9c7e8068e18" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6ee621d66cbfcfe4200c14199aad6b4a" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "d9c9b09a40bef69f0cb11a4abb4d93da" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "db7f5c5bd0c6fb4d5ea288c438223894" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "91b269b12a9ddb66d584d8f55044534c" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "323d8a162da585ba72022f693c2b4fac" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "9b0296701a4e793a693704d8d5009211" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b9b006402e97cd8ff456cdda77a2d1cb" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "710754858ec9e88a4153ab3515498ed8" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "d22cb295dbe6a4c33987083fb13dee0f" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7e4db1e5344a5b9f562fb25e255d0a1e" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3fb77dc6a5c3786a91aaaa039abe7599" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "cfc73ab4957d0376aace0146eb6712d1" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "120210a2f8520e8c63be318d33a8e698" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b0ce93712c75c7a3a5319966243d073b" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3d2071f8ddc314b566102cf3327d2ea1" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "3739abb94ffd0d3eba40d821fcc7e9a4" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "64d390db31d61a33d00d0b28066e3e33" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "803dcfde39905abeb829cbdb7ebadbcd" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "6734c5ec287042654a43f272b2ee1063" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b960863fc4880462a993ee36547727e4" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b06f1798ce6211bb871a37306a2fee72" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2ced5b5bb2474fe3118df118cb81b29d" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "cc77c8dc5a03097d34f032d6beac80af" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "2cdab1fe48d919de864536ce6b5a9daa" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9b826e82ec4ad3019ab5e0cbb10ca5eb" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0becd9873ca627e011d4f4aa94e9ca08" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8a54a07c40066ac2d69d2f29bbb6d919" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "cc789719e26ce1da9e0b3513457cf672" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cc8ec9872e82aeb474461867df9523eb" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "5b9e87e696a832633bf3f82d471d8333" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ad1e9201c031ead7b080aa64ac11076e" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "3ffaea1978340959b351a82490dfa9be" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.46.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "b22d47a40ab25e6ff9d3b2207869455f" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "a18c5dbea68385f90849066afbded762" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.47.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3f02d8cbf238e2678db55858c0453ef5" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "e3597ba2482247cd0e2942dcb20e98a7" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b888a9ed92a1f8ccc6b086f11984b5ba" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f54b6617e2e9796cbcd1fedad1b6f25c" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.48.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "76f4d65c7be9950b86d3e62b653963d0" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "a43fcf75f9871b99f303052539d7b712" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "93d64ccda1b108a8f5f804075d7f8d51" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.48.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "393c7cf6ccbc03800ce9cd8fc46710fc" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "675e7a6bf2fe33f348cd4c79d725d0e3" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.49.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "24abd7e9258e2c2b0fa86258d292fa14" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "33a86d35d23047aaeeee6a1f975d30a5" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.49.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e2c9dc6faa0ca0ae8be0c7931a50aec1" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.50.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "408213d97b52e956eace132fc588576c" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "3ef6c59fe2d596fa928e7c9e6df8c7c9" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a2831459a12c4e8cf7cef061196c6f12" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.50.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e3ea98974471551c3278acecbd868d29" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.51.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "744d95db7bac0cbe8a5bf62213ad2931" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "b5ee11848efe9da2f1269a97e78576e9" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ee11ad4ad88b94b6849de0cecbea74cd" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.51.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "948eb2d6293db3008dd489b4c8cf88ed" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.52.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "14253d2f843db695e265c49376d34110" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "854166d750a5821430cd0d5ef20f9d7d" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "3a0f33389e9a578954bb9bf1cc8d67ce" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.52.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "6331423acc6b4eb350f23e0cee38e4f5" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.53.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a3768ea70cc8c780d331a9e5f54cd951" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "fb8217a9d6bac8261fd576bd7c31dcc0" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5e8b56d693150d0198952c9eb540f7dc" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.53.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7b0212a148b449b94397176957c65d78" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "7f847dd631be283bf4f8dc9173762698" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0c207a899bcd2bc7b88ab376637b6a95" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.54.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "8c7b5f4315e0ae1167d7e2bca5105e99" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.54.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ddf2f5b7c61925db5718ed6ee69ec994" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.55.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "86b71a9eeef27558569d0e586da2f97f" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "919bcf200aded9489b10f5c906b02c65" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "fa6bd143e64ebe01240487ea2f26c72b" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.55.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2d05fd09f7dde2d75f596cc01caf4f69" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.56.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0d1033aa80a255c54902dbf7e96aa2e5" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "f5c80d483779fca3e693aebb52b08348" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "be32f2b8619e2a0171f9cfbe1f1fa417" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.56.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "8a939ead7f046c5d2356a80e2ffa9da6" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "b1b4da1d338ab3cd983ca1fd5ab9490c" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ed1ceb59121380ca96c55e8e0d9b5082" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.57.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "160656e3abcc904ffffd56c49af70cf0" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.57.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2ef00573a6b5c290e48e1c43b9b81272" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.58.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6ef76a299baee2564fc0f49c0f20fbb8" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "d7da2d1c123989319ea9b8a449cae714" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "7eeae7b3f56e87edd124809d80f73a9e" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.58.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7a2dbbe5d1182db9db10256eb78c0b81" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.59.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c165acfc400164cffe6cb7b7a71c4428" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "f1a08a740c6ab0d7aaf21a0803ee443c" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "50ef27b4c9cdfe83d53971a98bc16b7e" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.59.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "bdb9fb1c83e037201850e70929419c80" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e68c5ad8a7e0d5c0219071425f4c396b" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.60.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b65ac76351d78768bf097b10f9e39a17" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.60.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "1668600427d3fbb8841f1923985219f0" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "b76af8c7730798816e215d42e6e94acd" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.61.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e8d23fc1776f73cbc6c1613238fb8d09" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "3edf7d13b1d320a6d7790cb1940808d0" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a13212cadd1294ca81b533f83cd7e530" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.61.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a4325a0af6123702e5288ed152f2a62e" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.62.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "fa13e69be32564d744b1e55b22855f91" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "614c0ff1b5f0d87e8f60583aefb9078f" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "d404597689ac49dba02901f93139c015" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.62.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b54e3127f5a365bbb2429785530c781a" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1d6d973ab7b26a03d8e611c8c5ab20c7" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.63.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "abcb3d454c0b457256f8f84c6ac62f58" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "98ceed2e8c16f53a0765ac0a435644b9" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.63.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b38901dacef3e6973dad7f8fa5035146" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.64.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "409ee390fc8d891684d75ede5f2874dd" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "ac0015df8239dd6f3cd0e4ba9ea1f677" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "4ec3523fcf468b7961e1e5c5bdb6c6ff" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.64.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "4ccff36a6593775edf5829ccbeccfb81" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.65.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0e8f6f1f5742e78c5d11f0a7a00b0f1a" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "70c5f17c8852ee90b50ae6447809ab7a" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e7cdd7613d7cfe96e532432794ac8a25" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.65.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c31bfb7fc227102dd5ad83af91a31e97" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.66.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f5372ac190601baca940a00a4fbba04e" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "a98c14ebe9a03e1396599b5362ca8144" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2aa543cfff14b2514aa81a764e0b00ed" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.66.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "4f72b7845c50974c1c0ccb005c617c3e" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.67.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7aa23547c7af0968c550b4511c046472" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "0df3b2bee5cd58b107a0c0b3da6df569" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "7b5e316fe1c14ca5ddff0f2a98f7d0b7" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.67.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "249bd2760d2c3a31b732f294c7f19808" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "4d83a996d06de3e0ea1f11b438d88a37" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "df5166fb0cb3cae8914015de4a9da831" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.68.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9e673e81a08fb861c5cd0e7f7f34dbce" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.68.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c1bc0b00d92fa135ebc0d59aee0163de" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.69.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2131311b0380c866ee3374999471f958" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "02f092235ae33688b680c46d4ed43327" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9966535166f0af9c8b9e09f1e3d7299e" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.69.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "bbe16bec11eb73c93a6b8b97ea4d714b" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.70.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5a2a07f6a85797d16d9f5839486259c3" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "6f0299e2999fa4afb5ecd36309ad54a5" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "abfecb20f5dad97b289dc9cb54621b4a" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.70.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "3465b662f6e86149cef0c564fd4742fc" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "82f2efbb27c36ce681f5a5013f68c1ec" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "dedf45a5dd9be18311708740ede905cc" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.71.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "53275766fded9d128b899a629a4875da" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a5036e6cd3f15329db03af135e8171a9" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "c49efb0555a7cbd3da2c503bfca2aa20" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "cc95a6404df0389d24519a405c538824" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "62e250e08db34cceea3359552f3cf23d" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "904ab0fcc620328cd99cead293cd706d" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "84686963b810316182995c335f176256" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9161bc80e0c5cd4cce23e325268f0e1d" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ad4e5dff6f53012004006c6bfeb72a9c" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "bd5d90e2fee07d15dc6a79ea7d484b5f" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0ef8c6d297d5557017405eb3e58ed078" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "946464d1512ef49dad5ea445dbf338c6" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.71.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "b5f3606c48cc9ccf4c817688ddcf0fa8" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.72.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "b8269384923f95cd64c58dd269067eb1" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "f3ce9dd8c045029d5e7777cd086cdab0" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "91f85dd58c4dcd7c598360adb9432eca" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.72.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f0da9b57def2ac0eb78ed2ba552adc03" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.73.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "03c79ff59e26c3503caed45543d4456c" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "4103ca3b9d4b89d97170b1ab47727a37" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a24a30e499c7785aebc8b27b560afa0c" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.73.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7894817b059b4ce0b028f1042d2248e5" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cd3d9ecad1d925aaf704637b4675f4ca" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.74.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0b5979089a7c03d1e41481514e0f3b50" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.74.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "cd596e34fa974adbc4ca1fc077612950" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "43009014b3ad8a4757404d6b77c95c5f" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.75.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "28a92ca2327618004a2d809afbd6c9f1" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "4e7c133164d7f0634d2ba1be8d93beee" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e841811479b056d34d2e95ea201c201f" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.75.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2262dfab3b6ef09e0cce14974baefd70" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.76.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6c36fa512f16d3b59dcac6ee7783705d" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "eefb0c192627c4c38b531b37b9d69073" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0e5bff850566947e3a528b4512db3266" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.76.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "354b857f8cb54c70b042ccd15cbd4f8d" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "08502002a4102016f5cac354c300450c" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.77.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3424cd0cb699cadb80845b34ee4f2f72" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "615915194099bbe6cf157d73118fa508" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.77.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2a9ba9e5139c9e199511385e5acd6d63" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.78.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7f278a6cd1a299581f846a7d1a5595c4" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "cbb9c0b5c527fd870738aa346c48d4c2" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "833aeee391c6c5e203b4d7977ec17fa6" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.78.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2fe5f419a4037d1506dfa5bdb84e9e95" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.79.mlp.down_proj.weight", "shape": [ 8192, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8c240bc352be9adec7c1d856c24a5817" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 939524096, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.weight", "shape": [ 57344, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 939524096, "byteOffset": 0 } ], "md5sum": "beb6453384c1bd6a71e70425eb51f72c" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ff6128c65860c12d47817d308bd6c1fe" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.79.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "73cc63238def24a578444e6fecf66e9c" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 2637824, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32768 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 49152 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 65536 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 81920 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 98304 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 114688 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 131072 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 147456 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 163840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 180224 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 196608 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 212992 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 229376 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 245760 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 262144 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 278528 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 294912 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 311296 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 327680 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 344064 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 360448 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 376832 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 393216 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 409600 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 425984 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 442368 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 458752 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 475136 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 491520 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 507904 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 524288 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 540672 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 557056 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 573440 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 589824 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 606208 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 622592 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 638976 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 655360 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 671744 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 688128 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 704512 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 720896 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 737280 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 753664 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 770048 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 786432 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 802816 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 819200 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 835584 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 851968 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 868352 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 884736 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 901120 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 917504 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 933888 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 950272 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 966656 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 983040 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 999424 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1015808 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1032192 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1048576 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1064960 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1081344 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1097728 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1114112 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1130496 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1146880 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1163264 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1179648 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1196032 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1212416 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1228800 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1245184 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1261568 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1277952 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1294336 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1310720 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1327104 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1343488 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1359872 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1376256 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1392640 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1409024 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1425408 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1441792 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1458176 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1474560 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1490944 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1507328 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1523712 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1540096 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1556480 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1572864 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1589248 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1605632 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1622016 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1638400 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1654784 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1671168 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1687552 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1703936 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1720320 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1736704 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1753088 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1769472 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1785856 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1802240 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1818624 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1835008 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1851392 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1867776 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1884160 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1900544 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1916928 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1933312 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1949696 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1966080 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1982464 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1998848 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2015232 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2031616 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2048000 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2064384 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2080768 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2097152 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2113536 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2129920 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2146304 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2162688 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2179072 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2195456 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2211840 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2228224 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2244608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2260992 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2277376 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2293760 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2310144 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2326528 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2342912 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2408448 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2424832 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2441216 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2457600 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2473984 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2490368 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2506752 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2523136 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2539520 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2555904 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2572288 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2588672 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2605056 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2621440 } ], "md5sum": "bf2d823f168ed9325e816818a76b0baa" } ] }