diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4785 @@ +{ + "metadata": { + "ParamSize": 399, + "ParamBytes": 1929527296.0, + "BitsPerParam": 5.002114406234114 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 155582464, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 151936, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 155582464, + "byteOffset": 0 + } + ], + "md5sum": "fda9f3ad990f79e4ad5ce7cbf9bba25a" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5c172bee39132457d744d1409f75812c" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32133120, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 151936, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 19447808, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 19447808 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 19451904 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 30724096 + } + ], + "md5sum": "40fb89a389eb44e825a12b285ff09ca9" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "d271d6fb298743cc26f5c98aeadeecbf" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "ff76cfb2273c800110970be6edc03288" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "3e4c7c1dc71a952aa6c8b6be782bd683" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "82a9e469bf342e2a4e5e281a76569700" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "864d39baf7b70086bf783ea9842fbdfa" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e9721e4de2bbfe51c8b5e979d1fee9de" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "60b8ce1c2affed1e51f4d5b157208108" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "d83d3d51deb5106f6383cadc44670bbe" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "a6923c0e9b15a5d6a72c7de8263aeae4" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "068cbc345de167ccddbe9fabed1930e4" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "00d3d8622dce76de970fb45064877c36" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "d52c875651ec3054134a80193d022ee1" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2fba818f33e45537152a123fd027c824" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "df34827a58c58061632f605ae7238627" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "996ac335a40811d58114f10493e9d1fc" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "df3406fa6b41994c6a88f478b03473cb" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "032f55c582305debba0c4ba8ea2ee99a" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "884fc6bd06655525ccfedbce46eb5b1e" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "91a9a6c0f18a080858ebc1a2c12d5125" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "34ea1dd0469a8ce3a3af3ddaf9690ce0" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "8f82321929c2ce70fbbc00e5c7ae5131" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "59f0808eb4c73f9c3fc3d141da9786d4" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "42e630fd4b00eebd399529140e38841e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "981fa8927ffed2115bda0649367a3c7a" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "99df784b181d61de2e5213ab3a745a72" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "bdcdb7ed62905a50a549fcf2b71e314f" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "e69d03292e1378658ba95d9027e75acb" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "24c0e4624fafed3b6e80afde314ed9ae" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "fcacfb0345c00b4794974d399b99b398" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "0058cdbcf2f29f0c8c9008cf03d764d6" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b47b30cd98c2fa5770a54173a08365db" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "a1c0d2af51d815ac854e394f7d2ec327" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "39fff054623676653b7ddd633bb6adcc" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 26134528, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 20816896 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 20822016 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 23443456 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23771136 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 25868288 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 26130432 + } + ], + "md5sum": "1fffc6d27d9cc240b10ce8b091d1ea5c" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2ff05aea6a7cd0ca213c564c07125349" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "247ac5e71630a67eaf548697af2f2f2f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "a42ef72dddbed4b073772f6d4f08186c" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "77ac5a82c20ad02b596e252097e91a59" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "2e399ab5cff08b122e0457ec24f700d7" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "831e2782610ec294352d7514b2d26d30" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "03e5e698f8d856bbb4fc0b2927467783" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "528cae6e9d10092b4c5069892d14791a" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "508752b8e333419aaa747d3a432609f5" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "8d6006a33d90beba0e5cdb6566469608" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e7c6f4abb3db0d8b0ee443db9ae65974" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "dd6433cd537d2fe52b48441c8cf1f419" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "af7c5887c7970497bdbe2ff767597eaa" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "8f930b512a824b33651b5a61cdb01dad" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 23647232, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2822144 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 2826240 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 14098432 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 15507456 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18325504 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18329600 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 18334720 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 20956160 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 21283840 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 23380992 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23643136 + } + ], + "md5sum": "1d58453be4360a0b7124b09ba9095cd5" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2b86659a645027cd0bad250a634e554f" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "c3d6b80a7fbbdbe94853190989851134" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "c2096a4c6fcc31fa64fb9a77d6d24431" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5bacca279f2e2c2b1b8b163efe1b6647" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "107d466099c5f8def3e93087d09a7c79" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "d58f2120bbe70dab7b17e46364fcc5fd" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "02546e437656d4e7929772d5b9a7dab1" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 33502208, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 11272192 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12681216 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 15499264 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15503360 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 15508480 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 18129920 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 18457600 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 20554752 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 20816896 + }, + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 20820992 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 32093184 + } + ], + "md5sum": "4ca6904e1d8525f0f6ba12f5e7966832" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2cb7a757cceff5e6e5249c9a925f54fe" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 28960768, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 2822144 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2827264 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5448704 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 5776384 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 7873536 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8135680 + }, + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 2048, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11272192, + "byteOffset": 8139776 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 2048, + 344 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1409024, + "byteOffset": 19411968 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 20820992 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 23639040 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 23643136 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23648256 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26269696 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 26597376 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 2048, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 28694528 + }, + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 28956672 + } + ], + "md5sum": "8bf5c530f740f433a0f3004c9213a85e" + } + ] +} \ No newline at end of file