diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3529 @@ +{ + "metadata": { + "ParamSize": 283, + "ParamBytes": 1807423488.0, + "BitsPerParam": 4.500626782697164 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 197001216, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 197001216, + "byteOffset": 0 + } + ], + "md5sum": "3897af73cdd58ad92272571d1300a477" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 24631296, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 24625152, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 24625152 + } + ], + "md5sum": "5ac94bd3abfff44fc23c41e36d66afaa" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ecc48d1526a57c59700a7a00714f2e44" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "e1ca271e067e6656ca4ab36edb1ca6d1" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1c1f6d4dd70274229354544ff522e1fd" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "1b373ddd9beff49be6525e49294a5855" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f22d16eafe23683b74160ccbd1b53287" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "2f7948f056ca95c7827c811b3cfe72b4" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "620ff5954a80756608e5da8e163c9e97" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "b1f2d93d2f26ecf6b14fc86099913227" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ac5f9c3c8d4aaec671be2a51277d3672" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "b27e1e1b1f88e2672e4fbc8c9067d930" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3fb46147509cd1fd2a6d16cac1f319c5" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "ce1c4a27b33e6e02237f4d4fa8cc87a3" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6eaa627b6c5add6eb644a3ddf70bd87e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "d1b2de557c5de3f79370980f14791b9e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7a076964808c2471bd779b3095591cec" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "4ebcb62c7a0d3a0fa5c902646f1c02c6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8647182f23e25657e0eef4226c128358" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "c976b6646e8b72ea62bcd07dcd21becf" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e063d2b2913d45840aeac82b6253528e" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "328e006ed4ca26e3a101d07005f619bb" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ed4aea884d7a52bcf738cd8165f07e47" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "f133b6e62a2a80933060b943e68cf6c7" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bcba670de6bed8d2472429796a8aee7c" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "66c78cf7715273c5a41873d4261d19c1" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b41b91914f85719aa35b99c141d87d48" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "089f7ce8f169578db28a929212269ceb" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 31463424, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + } + ], + "md5sum": "1db4c69f3658a4875a58d164db882eb2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5ffe8a30519558d7fc43015ff3a7722a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 31463424, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3145728 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11010048 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11993088 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16711680 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17307648 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29890560 + } + ], + "md5sum": "4ddf2fdda491d56c5473498b08fb015b" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "eb4e7a9e9256852414a565e5f7075440" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3145728 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3151872 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11016192 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11999232 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16717824 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17307648 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17313792 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29896704 + } + ], + "md5sum": "7ac9e3430016c6b088a3ed27541a6db8" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "66e6ed9813c385d620a4f960c527a627" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3145728 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3151872 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11016192 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11999232 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16717824 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17307648 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17313792 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29896704 + } + ], + "md5sum": "12f8da0ff107fd5c187ca4bafa48cbd6" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8cd1567599aa15618c506d9e560d045f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3145728 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3151872 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11016192 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11999232 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16717824 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17307648 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17313792 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29896704 + } + ], + "md5sum": "23329609d4a0b9ca1bdd47c5232eea1b" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2ad30c53be807a6cca1a48d938652e09" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3145728 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3151872 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11016192 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11999232 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16717824 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17307648 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17313792 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29896704 + } + ], + "md5sum": "88d3eba3ec988210a61f339062eed457" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2230e24b59fd48363392e27a87cf7aea" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3145728 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3151872 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11016192 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11999232 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16717824 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17307648 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17313792 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29896704 + } + ], + "md5sum": "cfc09d309063ae2763549b68efa25801" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e12d9abbd9abf3b5d14814102602b367" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3145728 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3151872 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11016192 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11999232 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16717824 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17307648 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17313792 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29896704 + } + ], + "md5sum": "f167d9a8a0b5b521be424b798e3bba3e" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 31481856, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 3145728 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 3151872 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 11016192 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 11999232 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 16717824 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17307648 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 17313792 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29896704 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31469568 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31475712 + } + ], + "md5sum": "b8350a849d54f28dba386ec04f60e85d" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "efefaf4739c17931027b85482678836a" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "912302f09dfcdb33422ee1ca4fe55632" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "64a20e412576ad25cc775b9acb406751" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "746d57a96ee03acf6a72b39853317aac" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "03d5b07107c9a325aad9052729809052" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "f3f2675384ed873ea7c3100b2cdcbb27" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3afed361f4bc03d92fc60723c0a2ba01" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "25df7b6e2aae12ca096922cd8ff48e02" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5526494f18f6243bc2f2cbd5a53d28b7" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "7c0cf55be3055f7108f471dcd678ce1f" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "cbc8e1289bbfc4e34b8c7bf705994e3a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "69ebdc997e003d635d0540d02495c8ad" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bfa31987e09243a2a6f8e2c99f43428b" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 31469568, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 17307648 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 25171968 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 26155008 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 30873600 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31463424 + } + ], + "md5sum": "615004e4bdffa99900b1cfbdebf3f219" + } + ] +} \ No newline at end of file