riczhou's picture
Upload folder using huggingface_hub
abd856f verified
{
"metadata": {
"ParamSize": 313,
"ParamBytes": 4760885248.0,
"BitsPerParam": 5.0011817065612245
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "c6090dc83091739c31db89baa2b599ca"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "82b34d032af0c5e6c9aba005aea115dc"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "679a6384deaf5483e97bc93bd7219bd1"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "b1c56de3d9d14b7dcbdf70de22eef5f0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "82bd43d2191d1d7f775e659e925c90c7"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2f8e8ecc9c484d45d85606110466eff6"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5a1f6a084302004d1295bb43f280be14"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7d8a7f8bf1f13cd4761945d8765cf128"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33519616,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7168
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 4250624
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 12737536
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 12744704
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 12753920
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 21011456
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 22043648
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 28466176
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29268992
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 29276160
}
],
"md5sum": "aea3f8be8b8882db3a4234173ce314ed"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "04c74282a8758f59f645743a88be9200"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7b2c7bde0041b77d9ff50e67d9e1245c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "4cfd3f44bda97281fd1c3cf10fcf8e45"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "3b2ae0ec0e37867c1bfbedbee24435ac"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "1bf1ac81a6e1a4ac9da16ae47bd35f50"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "cbfca11f871b8d6908c9c470659f1f7a"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "ac0033f23ce40dfefdcffa70e184e512"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "ec11fd17531d2529a5cb3eeeccf2f927"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "47df53f8e307fdc0c15f2cc42312b3cf"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "24395ead1cf3097dae04087aa5d4b8b6"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e452a8a7a8054110859250de8a7e27a2"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "1633203b955f26ad6430b4242c1197b6"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "4b7ce5bc5200b33b1592746f437eeb6c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "83f4e8795a07a1d9af108985f11f283c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "84250ee2c0630f5516b5529b09346467"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "8589be6098ad7b54f31ebecb1868094a"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "21974fcc49c5474983d0bd5abe5ccc81"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 25018368
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 25025536
}
],
"md5sum": "909a202ae882521e4419e33095137ccb"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33285120,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8486912
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8494080
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 8503296
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 16760832
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 17793024
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 24215552
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 25018368
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 25027584
}
],
"md5sum": "7244eaa63d285bdc953c82ec69f07faa"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "fa4d4b0be34fb1e37e280b96e53b4dfd"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "80a66f39d948050ccd128929c9fffd11"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 30301184,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 1032192
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 7454720
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 8257536
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 8264704
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 12508160
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20995072
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 21002240
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 21011456
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 29268992
}
],
"md5sum": "6262b70ac382d94773c7b9b6d7039171"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5ea3565b77ad3da108feff9531a987d8"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "72c1c6ec7d6bd72f631c78f36adcb6d8"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7225344
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7232512
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 11475968
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 19962880
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 19970048
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 19979264
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 28236800
}
],
"md5sum": "52044819a3abc39d5435685c19a8b38e"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "db75f5cc7e59a61e4d79f94be2e23298"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "766e41b13d6ca7c9557d0930c7818089"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7225344
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7232512
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 11475968
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 19962880
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 19970048
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 19979264
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 28236800
}
],
"md5sum": "416f6a3d6cc85b03626155d088bd71a1"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6751f1285cd72106f60aa9b7b6110b82"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e889b30b0db1b4208b396725e718959a"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7225344
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7232512
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 11475968
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 19962880
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 19970048
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 19979264
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 28236800
}
],
"md5sum": "60786c02d4469b2ffa45850152972c27"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "05f383bad66af9e7f6244015d47714e6"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "02513eabcf5e3c7df185872ffdb12728"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7225344
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7232512
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 11475968
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 19962880
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 19970048
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 19979264
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 28236800
}
],
"md5sum": "20c600eb174aaa0c923e41d355e0c73d"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "664157c57f68babdf914ba25c45023ed"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "0beb1a0f39683ff6d3b0dc638c3de4d5"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7225344
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7232512
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 11475968
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 19962880
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 19970048
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 19979264
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 28236800
}
],
"md5sum": "25e6ba4cb4a29fa4c2e12458f3452c80"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "77d7adf991692bed91928ec82ba76118"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "1686b572df8490d42e0564b82652827e"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7225344
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7232512
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 11475968
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 19962880
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 19970048
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 19979264
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 28236800
}
],
"md5sum": "9109faab401725c140db08f5f2f24bee"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "2b33316f12e77665131045c97eb0e68c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "81af6f14a38804c55112d79eec781e85"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7225344
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 7232512
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 11475968
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 19962880
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 19970048
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 19979264
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 28236800
}
],
"md5sum": "c806024caba15e2ca18ae3f169983284"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "5ae2b71f595ed38a75c3727523bcbba2"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "842efd93fac58813e357d383818a3298"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 32243712,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 6422528
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 7225344
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 15712256
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 15721472
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 23979008
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 25011200
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 31433728
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 32236544
}
],
"md5sum": "dc09b53b68a1011a7e321da67afaa767"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "4ae2a0380be1570ae4775d2ccc38a8c7"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "92d8c5953250ccc97f479ad774caa38f"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "f734f92231c5692ad5ade3976a8f0bc0"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 25491456,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 4243456
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 12730368
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 12737536
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 12744704
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 16988160
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 25475072
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 25482240
}
],
"md5sum": "95850aad3f8247454367ef22cf5da5f6"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "4f658fa6d95c8cc42f5cc891c296745a"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6ed6fb8accc2ad58ba79c249084af46e"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "541583aeed483d402b784cf675273e6b"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33526784,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20765696
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20772864
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 20780032
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 25023488
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33510400
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33517568
}
],
"md5sum": "6517699a6017a4bb430b424fa617747f"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "86d9844ce5cd69481a2b1ab8370b49c1"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e8ede02f01d0fef4f1f91d0794960c67"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "d712795bcccdf5fea419f733af345232"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6fa0a6f255761e22363bbb4ee1e62673"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "0e976a9d9773ead50642c9a402d2462c"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "2122d748b08f21d1ad8fad74d9e2e2f5"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "c4cc1d40edb8514d3bfb15aa22bf70e7"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "a3d64e22d803191e89131dab7f836580"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "ec483f0d90dbd39aad704d8ab03b6d20"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5fdcab2fb3c578b55d21dea0af2159a4"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "ad9a1f48f5769b1bf629736150d17f33"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "0d70c9a14b0ef82693413d1fe1924a97"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "240abe47a0468f34ed10c513ac7992e8"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7435ce3c55a4a1e8da2f4e25c693e457"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "6f78e6661a86855a936f4cd0ef9a8231"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "b79215559c8481e02112f7236279a2a5"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "16230ea78968f05c629f4c02f539c5e7"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "a27bffe68189be655531c6c6d65310d3"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "79cddc04361d6708c1004a233330695d"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "742ec7b120da70a4a118215616a2cb1c"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "4c0b5bf830eb8fba0b6c1acfcd578778"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3584,
2368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "079ac17ada092579742fd5b4c6ecc5d0"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
37888,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "d792a66d96afb2b7953ca3a259d78f37"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 29268992,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3584,
592
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4243456,
"byteOffset": 16522240
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
37888,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8486912,
"byteOffset": 20765696
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29252608
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29259776
}
],
"md5sum": "e315d07f59feca87b63e3b284f5d7ea5"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 16522240,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
4608,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
4608,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1032192,
"byteOffset": 8257536
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 9289728
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3584,
112
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 802816,
"byteOffset": 15712256
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 16515072
}
],
"md5sum": "6486277b39cf9301ef64d8c7666fefdc"
}
]
}