diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3137 @@ +{ + "metadata": { + "ParamSize": 195, + "ParamBytes": 13476831232.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 32000, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "186b2dde3a9eff8b8c633031ec403339" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "9f5702372678f645c083f14ca4ab8b95" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "b25a40b92df2f91c8dab2b518d912483" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b8059a4c1032e3f08b639911c8d1bcaf" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "00ca5cc5d7f19a572ad06577dea72aa1" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "f2e3c0b8ecf774e3ab6bb93e9718ea5e" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "dd7fc43f91c71b5146a1e6bdfc2e3a5e" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "33f0e7a6adc631cf94e4e437f69f6112" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dcea29c22fcd157154d2b4079de93688" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "b77fbe54dab9017a04fe1f78b4d4f6c7" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "1e2af700acc5afbc2189c76b8ebfdfc8" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "800f8575ef1f2f4930836ad11efeba49" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ff6f73dd5579654b2989d5c37a1339ef" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "937434cf707eb9cd73fd922f0f325cf2" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "928a2b739bb6b01cf2313511ee148341" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "050e8141aa544a2040c48259965afc7d" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5eb6304dfb4140e682858feeccdfabdb" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "ce3b6112d41ab8bf1c3a5e4ace0e6352" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "377a997d33ac2896f81dc5d5a964b977" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "688588e6a832ea7b3097204742647115" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4c83d99bddc657f4d0e44e3a898dedf1" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "03d1ee0d8374573cdb9376274fa7ad7f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "d087b6d2478ff0e32935e8773e96fba2" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f0ab56d3e1578029f045ad11421401cd" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3b05d4831d3f8530701b60ec41566987" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "bd353da6a1d684d3cc55c604b81cd110" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "f471f02f08878938fa0c64b6e5915c21" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b5aae30cf84eb26a1c1d20f46ec6a463" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b8eab0a2c266c964b3b81ec108cef7e1" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "02c6ef4c11a83509a9172dd3c1c99e0b" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "23320f0b33062d540805ece08838b5c5" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2a31b3425ffb0032f858a98776b06e46" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "586254277890d975e948864f90646e73" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 32000, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "925c0893977c37bb2a47e5223c82a187" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "139abb4fee646c9313daf0db69c5721a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "cd07340ca2d9fb4796a774ed34b5cc48" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "74b49f4390e004ee9d7c96cc5593f9c6" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5aedbdc23b2f98d088284c36c611382c" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "6246e903531b99cd8730c79a46bdcd61" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "19518905955bf449e2642a1bd7282ceb" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4ce82b7923d147b25b6d2b8eb8d76bae" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6ac57c5cfe0769f22c9811f80be81d9d" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "e70b26b97ec1ceef8bcffe60ff400e35" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "acb899f6f0bc5b6532f2da5d64c5d5ac" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6f32cf82c9943851974657da42c8f1dc" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9639fc721ba2501512f15e10d920c57b" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "73a18b4973e43be3e7a58f9374d5b812" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "54d17d861248ed4951c43401389d98b7" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7519b35896097839bb43aff4d8404792" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4be8dbbe84262ee508eb4e3a9bbea15c" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "5dc410d19e52d5c90de98de55d672140" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "d8c9075d8ec92e790e5eff01f99b274c" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "76b04d7c3f00d3c00e9dcc390e2815e0" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c3d2a2854ad6b775b5c8eba072717d92" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "42b9d322d2e610fbef9d4368f60101d5" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "2e7ce52fdf2b9e0e18256cdd373a6699" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0f2898024c308b6dde39aa2d9a4764b7" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "26574c3a8a2e2deaf6bb32bd355db8a1" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "3fc5a2fba13eb841fc2717a875b1d4fe" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "051621f8886973f9f34771c3a0e16107" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "05f75b303c38eaf4ba979ed82da80ecf" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "53d18374a4be61951af774622b5ae892" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "effa2712df3c219b3ab276f9e9f036d1" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "ccb4114f69b23b7ae6e4e4f554aa7fc1" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9e1262a0d185e5b4c1ee9d677a818da2" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e5e169ce7384d6c7fc41b794509aad6d" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "6edf9342b24e466210ea739e26f97417" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "39585337bfd8c9aec33483a972b31e00" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "28d2bbc4c89576cb4a98bcf2d53e9316" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "49d6915c712251c5b5cb1f3ddd48cbdb" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "7f80f02ac990f82a5e8ed162f247963b" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "0cfddbcc68a932dbc336a932c8873ef5" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7fcec4f1a425a90f7ca05f9d068a61d5" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "73994554e4e2f4113ed3a7a1164d16fb" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "8734ab45e088f6b8e3e765e1da038775" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "438a3c7505a9074757c96eb88ab89213" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d8b2aed10b4be15c4093cdd97de636dd" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f82a26ad8b7e22bcd538bb864703b406" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "ecf514b12a5a9ddc230dc548ffdaa603" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "acd549fcfc786e53d24afc8b4e5a6d4d" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "eef00277b6bd5876efadf044e56d0118" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a3ed124ae0badee2a25716ab14e5ed5f" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "3fdab346029dc00d91c89ee966ada7ac" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "0c3ad6da8b1939f1a219721cf3021075" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "64518fd75c3d937345ac903702590e5e" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a5175c66fc31221d48f8bed7e1f4120a" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "03be79df00498e93947bff1c23aa8cff" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "6586b260e80d765a9389eef4a6941276" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "994d22374218d153683a2029f136a986" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "60ddc9931ebde1a12219700024491604" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "00388e537d60a1fb821f6841bc8b5639" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "d871fb17b4a3200a4593c40ab8d782ca" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "393740e9576977f027df7ce907ccfe05" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0d3b3dfc843f5fbb09033c2081e525a0" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "0d8c8a054c5692aad1643b85c2629507" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "5143da8850d2cb0eadde68dc8cb7e0bb" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d06fcb20449271a615715fe33757bd6f" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1ade4b5c58afa1328382e99a42f5eeb6" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "6ba0523f841f07024ec1b646bef0eede" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "d949c1ea9831bec47865102f294f8619" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c5c1c4b773ae666c69ec38b953ee5818" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a0ab8dcc94dd030b543865a18fd3bc57" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "210032bc66a9e7a0c37f8b91552de3d1" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "284e8bb0dc3b1d72c4afdcd67f3107f8" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5db9ac1f94df3e1c18a1bdcdac66aca1" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0eb9891df107199167b848676d683396" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "7e280ece8b9c816b37e7c1bf1c120653" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "953ed357ad9bd6bc8919dd43171c63f9" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7a0311823dd93aabcde107b3ca2e9f92" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f5c6c5ef3f38c57470f08d0d16f1254d" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "c121142ea0d944c240fea1e403536131" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "2c32c4951a08f99c2817d65c210225cb" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "95f29d86f877c14b1e8f0da88a7752c4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9defcc877964c34610caecbebc740d9a" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "cd0969d69201c60bacbea5fe4e78aa80" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "151d80607eac81f28cd68615568e541d" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8fc5408952cbbb3489c54aeffe0fb6bb" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "43d47223abe837632cd80c5c452ed28e" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "9b0f0b8d7db7cbf5b55da3c91e771980" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "a4e4832fb8a91948687e8e25b49e02f7" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d9b461527be7736fb9bfda51be7fc111" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c3cb30016490db46d59200a79a2d08c1" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "36ac74c4772f93f4470a2d3d4d52d2f2" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "540c6b7bf2a00f9f779b90b9baa4d2a0" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6329b273e5e36d567239ea537c04038b" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e46625a1270db315a78356a6db71641b" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 4096, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "11ceb698e222607ff0748fc38962e2ea" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "af80ab267c90cf546b6c4d933b633a40" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 12288, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2114932a725f6e253a45ea3afdc0df6e" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c2c9460d521a25c3972a3fa060015b03" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 532480, + "records": [ + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16384 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24576 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32768 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 40960 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 49152 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 57344 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 65536 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 73728 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 81920 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 90112 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 98304 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 106496 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 114688 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 122880 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 131072 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 139264 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 147456 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 155648 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 163840 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 172032 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 180224 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 188416 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 196608 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 204800 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 212992 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 221184 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 229376 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 237568 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 245760 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 253952 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 262144 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 270336 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 278528 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 286720 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 294912 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 303104 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 311296 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 319488 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 327680 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 335872 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 344064 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 352256 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 360448 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 368640 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 376832 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 385024 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 393216 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 401408 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 409600 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 417792 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 425984 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 434176 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 442368 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 450560 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 458752 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 466944 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 475136 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 483328 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 491520 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 499712 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 507904 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 516096 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 524288 + } + ], + "md5sum": "faeba979734e080f140a8e908a9a22a6" + } + ] +} \ No newline at end of file