{ "metadata": { "ParamSize": 267, "ParamBytes": 309011968.0, "BitsPerParam": 5.003910477452378 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 68067328, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 68067328, "byteOffset": 0 } ], "md5sum": "a5fa7630a8a1c879d8d8b2011538811a" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33234176, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8508416, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 8508416 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 8510208 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 10689280 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 10961664 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 15319808 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 15864576 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 15866368 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 15868672 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 16384768 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 16449280 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 16850688 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 16900864 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 16902656 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 19081728 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 19354112 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 23712256 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 24257024 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 24258816 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 24261120 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 24777216 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 24841728 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 25243136 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 25293312 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 25295104 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 27474176 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 27746560 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 32104704 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 32649472 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 32651264 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 32653568 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 33169664 } ], "md5sum": "bb8a20c211d35c35ad62f1a825c50bac" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33505280, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 401408 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 451584 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 453376 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 2632448 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 2904832 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 7262976 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 7807744 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 7809536 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 7811840 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 8327936 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 8392448 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 8793856 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 8844032 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 8845824 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 11024896 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 11297280 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 15655424 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 16200192 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 16201984 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 16204288 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 16720384 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 16784896 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 17186304 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 17236480 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 17238272 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 19417344 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 19689728 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 24047872 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 24592640 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 24594432 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 24596736 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 25112832 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 25177344 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 25578752 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 25628928 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 25630720 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 27809792 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 28082176 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 32440320 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 32985088 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 32986880 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 32989184 } ], "md5sum": "4ac62238322073dc5c0c9b04078db1d3" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33053696, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 64512 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 465920 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 516096 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 517888 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 2696960 }, { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 2969344 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 7327488 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 7872256 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 7874048 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 7876352 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 8392448 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 8456960 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 8858368 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 8908544 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 8910336 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 11089408 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 11361792 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 15719936 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 16264704 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 16266496 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 16268800 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 16784896 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 16849408 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 17250816 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 17300992 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 17302784 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 19481856 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 19754240 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 24112384 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 24657152 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 24658944 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 24661248 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 25177344 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 25241856 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 25643264 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 25693440 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 25695232 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 27874304 }, { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 28146688 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 32504832 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 33049600 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 33051392 } ], "md5sum": "8c3af5dd46a5ef37463597356c924dbb" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33020928, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 516096 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 580608 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 982016 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 1032192 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 1033984 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 3213056 }, { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 3485440 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 7843584 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 8388352 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 8390144 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 8392448 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 8908544 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 8973056 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 9374464 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 9424640 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 9426432 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 11605504 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 11877888 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 16236032 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 16780800 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 16782592 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 16784896 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 17300992 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 17365504 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 17766912 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 17817088 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 17818880 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 19997952 }, { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 20270336 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 24628480 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 25173248 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 25175040 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 25177344 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 25693440 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 25757952 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 26159360 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 26209536 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 26211328 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 28390400 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 28662784 } ], "md5sum": "d6849f4836a56796532408772094efd6" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29211648, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 544768 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 546560 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 548864 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 1064960 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 1129472 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 1530880 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 1581056 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 1582848 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 3761920 }, { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 4034304 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 8392448 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 8937216 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 8939008 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 8941312 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 9457408 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 9521920 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 9923328 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 9973504 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 9975296 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 12154368 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 12426752 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 16784896 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 17329664 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 17331456 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 17333760 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 17849856 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 17914368 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 18315776 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 18365952 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 18367744 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 20546816 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 20819200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 25177344 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 25722112 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 25723904 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 25726208 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 26242304 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 26306816 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 26708224 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 26758400 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 26760192 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 28939264 } ], "md5sum": "bc5c27170a9aa04e6facc0cf04556ead" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33297408, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 4358144 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 4902912 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 4904704 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 4907008 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 5423104 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 5487616 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 5889024 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 5939200 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 5940992 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 8120064 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 8392448 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 12750592 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 13295360 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 13297152 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 13299456 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 13815552 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 13880064 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 14281472 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 14331648 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 14333440 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 16512512 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 16784896 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 21143040 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 21687808 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 21689600 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 21691904 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 22208000 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 22272512 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 22673920 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 22724096 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 22725888 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 24904960 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 25177344 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 29535488 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 30080256 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 30082048 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 30084352 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 30600448 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 30664960 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 31066368 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 31116544 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 31118336 } ], "md5sum": "f6827611ee989ba488d5db530effe0a6" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 14605824, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 272384 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 4630528 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 5175296 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 5177088 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 5179392 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 5695488 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 5760000 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 6161408 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 6211584 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 896, 608 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2179072, "byteOffset": 6213376 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 896, 152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 272384, "byteOffset": 8392448 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 9728, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4358144, "byteOffset": 8664832 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 9728, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 544768, "byteOffset": 13022976 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 13567744 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 1152 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2304, "byteOffset": 13569536 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 1152, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 516096, "byteOffset": 13571840 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 1152, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 64512, "byteOffset": 14087936 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 896, 112 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 401408, "byteOffset": 14152448 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 896, 28 ], "dtype": "bfloat16", "format": "raw", "nbytes": 50176, "byteOffset": 14553856 }, { "name": "model.norm.weight", "shape": [ 896 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1792, "byteOffset": 14604032 } ], "md5sum": "e28b7d6b836a1bcb649031ea75718dd2" } ] }