diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4311 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 4140998656.0, + "BitsPerParam": 4.070120983102826 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "322e9cb9ad2f221fc0457007eb472e25" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cc5224947ef910187fdd00af79d00b34" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3ebe21dd1b0613ac8c7d45945881f18e" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "746988d55bddfb4042023efdd48a2b8a" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e3922518d1b5aa74de9daff9745743ce" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "56fd863a49bbeb63a354fa7c8b09060f" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21962752, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8208384, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8208384 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 8216576 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 9134080 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10969088 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10977280 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8208384, + "byteOffset": 10985472 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19193856 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 19202048 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 20119552 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21954560 + } + ], + "md5sum": "1e91be3a1a42e388a365906a92ed92f5" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "66644d60c82adb1c76ba4ba6af931756" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a6d4759a48803975f9e294a2be3c0b05" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "944c8ac0cd3442bb4f3a009fcbf1ad71" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "aef1345c072ceeef11d9908108b194dc" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "7d2d78390f65905b1ad4aba8605fb0f6" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "13f11c39d0419c62c9c550adcdc23c28" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e3ebaf4e77e94034032007489e05e9eb" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c8adabf4f94029899e64953d8153d4e8" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "7694fe9b9969b686dc9a0958830df1c5" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "805861b546341e277b984dcd349b222a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8c8c0cdfdae4b78dfae5ac301401a518" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "64970f5ba38655edd1a34a2d59ee8b86" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "70b7d0409e79963e85c6c4d00ebfa544" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d3eca0924eb1d682a962f4f4e0780790" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "4cce2cbb360c48908c89afe6d5521b88" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3421d9246e1a3ef369ce4d4e788a953b" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "376571f8747badcfd2e24475bc75f5d1" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "cdb63ebda58e6f8a5329762357d33e49" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a87f91d706bb35397f6df9151a2f798a" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "52923c082fa5b882f2c37cc7645c5e4a" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "be7537b85bef222f9b76c45994c60ebd" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "db809ad13edd81b6ab397a8f181667fa" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cd4e578961f50e162b666f7aa6901140" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 24379392, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21626880 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22544384 + } + ], + "md5sum": "d9c8d0e17122841b8adb6b9c32941950" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5de4095e978a1841d7427f632f259771" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cdbd8048d2e0c8ca892c2f762e07005b" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "a116274a092073b6080def33be36cef5" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "60886d7e318b0b89dae9f89bbd78f054" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6e7b7fc4c1e9cc4ec8dc134e15c79db8" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "5d31ce2688ba6caa487509aa467581ff" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "dfc16503b9fa13b770cf014dda8cbcdf" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "30768609886b8de4864d3b09eefa89cd" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "e12106253f5f470fabd359bb282a9996" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "12eedb3807b11f18a7eac0f216575b9a" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fedbb79ecca543d1dfc75ea37fbcf8ab" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "97271e667a46bba8952702873833029f" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "77e9776b85490b44a05693bad8ddfc1d" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6ab31c1f7aa9145aeb3649a06825ef15" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "de7e3d2b17b3ed9c125e422d245b6947" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "66fd5c0fe7ff6e027d5d487f9c522c18" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f2e11dc64ee553371eec5bee4f23489b" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "bc8de0e0330842444a07bcc10772986f" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5f2fc1d6efba7540ad72c1ccff1999ad" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c1c9c25c834f66721cf211750c70f0f8" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "b485d330bad2386b9e6b0f2070c7fb30" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f052a32d9f2ca21bc60a600f646666bd" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "aa8867c795bbb82258b4cba1b19b41c4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "c6da1b93911eae094216010f53b692fc" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "339b5714d7d97552b90c97a540bebecc" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9e1662df5ed024f0a2cac3f2ccdb687e" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "c9b24983b6f46da263c9fe68bc76b2f7" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d8273bba73327764bee207186b916989" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "74805d242b98d191c14e9b0ce528d005" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "a86d4b83341df07f7a15b4ed1190a9a7" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6fd80347c4ce853e42a69ac862955e0b" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b418e64e7a319b83f0facd051b3480df" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 24412160, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21635072 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21643264 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21651456 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22568960 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24403968 + } + ], + "md5sum": "7e602ae58ce54fe82eb44c6c06809524" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ac0f3ef2d59b834307100c2ce62f6aa5" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0a72ec7f4824f760f4f93267aa9ac4d2" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "eca71404edc106eca71702fcdb1846a2" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d2b14a23c6a99dcd1135a6afa6635673" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "83d56926a5418a5b0912c6e339836f8a" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "7191970af97b07a651766561d4d35325" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8505105f28517b1aa7e7c6b44ed1fb4a" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d03599fbdb83879fd6df34b153856cfe" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "780df52b01c27de76f0e4146bd37dbac" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b90ab5e3c8d1986a945cb4c6c2f9b6f5" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "11fd12e95dd08958e4d2583b75e534dd" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "91f8958a028df26068d43cad77c3bb44" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e0ab1182f7aa306abc6d1796224fb7b3" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "bb85f061c5de412f1946102cb8849491" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "c6ce5b2cf1e96fbf35edc49145f1985e" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "26b29eb357d551f9e2ed0ee065af7a46" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "88f1f09e6207129a4a469f662e02f4ee" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "2d3da3eb913c22d1a8de686b8b29b408" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d5768484d5737ea82836edf0a98bef9b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3217e0f511197f59dfb52b0c5e68a03c" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "d9358061859fe2eeaa4cb38b7f651588" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "48a914ef1e623fa520b38c5a813a9a64" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5a2f3db32f46a0207cc5afcb6767e7b4" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "075a7734bdeb89afeb5122c6016f215b" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0c1c4ffa61b435e1f268fc8a8c1aa102" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "edffa4961715461484d625c5e38b4df1" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "db84684302cd81ed79ea0e05650cf6ef" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cab9f31f5d7b606c77219887244e0c27" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9019b14260c5d6a1874a00c10c295f0d" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "b1009edff42594d254046daa5ba256aa" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cf4152ad212bbb19fe922845c548d782" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "893b18120c579067e004fc357a70724d" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "e97011637ce8edb9e413ac1f0962bdd4" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 21626880, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + } + ], + "md5sum": "af9b0f4314dfb1903eb42aa1c76e5639" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 21626880, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + } + ], + "md5sum": "cccdd8aa07da8c626788a96ebf58e445" + } + ] +} \ No newline at end of file