CharlieFRuan's picture
Upload folder using huggingface_hub
37da85d verified
{
"metadata": {
"ParamSize": 254,
"ParamBytes": 6171877376.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 622329856,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
151936,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 622329856,
"byteOffset": 0
}
],
"md5sum": "f5a393b8d9552a89cfca11fc481c720c"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "3d540cf5c2a2ab7213680dfd5e0ed5c6"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "361f7aa8156f8de998b7c1c9d5bd5b44"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2d08c7387aef840ea287f19307e7e47b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "68ec79d0607d550144ddf62b3b97375c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 29386752,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8192
},
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 13312
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10499072
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18887680
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18891776
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18895872
},
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18900992
}
],
"md5sum": "be1d43fb36dfcd1d840cdfa9a64e6504"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "55845a8df01f10bdbe20c22630c86bd0"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0c3aa0df157dd77f7964f8b850b6cef8"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "756a3536f62270dced8069c7bb6887c3"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e5c0162e746af3fd7cad0c0fc3561670"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "8b8daee7d14172301305932ad932ce1e"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9dba88d01c9223b0db3bacdfe891cba1"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5a208a69509e8269ebdf70084003622f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "79d89ac47f3bad07ebb1d8224723eb16"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2b91f85cc366d0b5dc4f54faf4e9bc86"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5be766448b63c3e23c78615fa32f47ef"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "29920b1d0a1597968c159f0910c1333e"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ca01d78b7ca8cd14bf111d442effafa1"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "fb7bb6dd3ebc588cb4b1a289c4c8783f"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "48eb23dd76b7d7625884ba3d22dd2e29"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3265f74bd740a3d379c1beba3be1c301"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "0d8055315e319a625b41e089524004a9"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "21bc3c7e04f0a5958960748e3abd2043"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "fdbb2860750fd623c46338922d26bb99"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7a64f2831f888838b56e94afa95e157b"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b733a9aff8df2ea1183a7012cc2157a2"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "17f0b9c174f901e4f6b0782bc3576a76"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7c398287c9e650b6e4425ee5f879e843"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d80df83bd3abbaad494b4683e14c1af8"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "911be85da7cad89831e35d199384ca48"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "498854d30f912cf609e23b94793a49ed"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8ea96b32450d8aa8d6550e026ce5768c"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e3d9fc23b0a006e3e7a0493bafccba68"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3b2c917b2d2ec4c67ffbb24435542764"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "8ce54448e11d50e8a1e4ad85b4e4fcbc"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "05dc8adc403fad378247a9b7d42b9483"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5e189ac5c1971a80d1551a6b58b025df"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "cf17dac10f62814bcf7e0372f88c5c0b"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5566cf9480e904a8ba342139821f1d61"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "dbee37031e5074f467abfc580c0aa593"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "dff0fae26d5069ed2becc87fe944a7fd"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0e015e559f95df6775bb50f225cff253"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "64c3faedd4c8a1803cfa825e2fd65306"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "cc1fa4af006e2bc42b6541a39ac16097"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a42b5d97993cb7c161b2fc0ba52935af"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "8df062b02e81c931831f35ca311181a3"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7627c959b574c2fa6af666e523453983"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ca23489d548bb01dd5b71e1cd8bf26d9"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4b1631cc9366d4391eee8ae046c847a8"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "f94e7f38b028d59098c2da3553198922"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "9c55a1ffd5079da4b28e87005c6c5b09"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ccb81925165a6b3792db857b0dd5f513"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "f9ced110c94a6c01589856e9aa9fbb5c"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "4afdf4c7edafd83e9f39d21258fd8ba3"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "bf3e2c92aed55b70193a7e072ca44423"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b717f1178878b6a4dc39b7feeb498e2a"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 27281408,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27276288
}
],
"md5sum": "b0063c03062c2e398d846b744ed6cfbc"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e287a99cb9eb77da12b94a7846c5e973"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "82e3c9ec032752a2dccf34fced07f15a"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "1675a7024c1e55381fc4edfd09f753cd"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f41be16ad3c5df95d89ae0ce03d73fc1"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4c6c01f318c1efb7db0c26f73e2b187a"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4e4f72bf6c1e54461887736061f461df"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2f62dbbea1d3d863c9187285ac371ef5"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "9edb92a2ad554aca529ec8498b9fd8b1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5614ae3d2723d66fa8f16752d4418ba8"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e5cc2ae2437366f1cfa87c4705a45a0c"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "a1deaf6c656c4356f3b9f009a8dfd9b2"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7c8c05324c6b564e0a71e9a1e9ecfbdd"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "809efda0b47a07bce998f3c3119f6dc2"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "24b65732b756c3dabdec06dd9947f167"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "01d3577dffc5c3dc054c71930d111f63"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "42688c734ce901d899f6a390fa2bf77a"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "1ed8bf2bc28ad779f7017838474b86ba"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ff0bfebb0ded1a73e4966a63e781a574"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "42973493bf448c27b25bb30eea7ddfcf"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7e0e6d1c53944bf098e4ee3af3027da5"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3ae79ca042b61e0ef058d72f76b0a1a6"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "cd5e45e71312882f65df34d95a6a7313"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "82648cad80e633408986e34866739509"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f06e275b8e7d5a2d571758eaa3225d47"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a771f5f3b024af75ce664962278e389d"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 27297792,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8396800
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8404992
},
{
"name": "model.layers.29.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8410112
},
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18895872
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27284480
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27288576
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27292672
}
],
"md5sum": "0122d938098b616e5d69f6c52145abd9"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f72a16f5cfdf1cfdf68de32ddb1eb798"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d1cc12a1b42ebf767dc7c22696f861bd"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.31.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "a13f1cae037b4ee8054843fe183645ac"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.32.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "adcd7cf7020b85f9c86499d30fe934a5"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5876feb63dada54c1118feb9c74c2137"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.33.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5fed0f8818c16354f9a62093defa785e"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "711be6b5383eb8e9313725756910095b"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 27289600,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.32.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.32.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27280384
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 27284480
}
],
"md5sum": "42e90febe664713da9115d1cb4f6b77c"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.34.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "13aebf4637ace6020f16b1445ea93c7d"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "890cc1b14a7b7b7cb3fb164e009caa75"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29373440,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18882560
},
{
"name": "model.layers.34.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18887680
}
],
"md5sum": "89441a1212527c15edaa3c5831868667"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.35.mlp.down_proj.weight",
"shape": [
2048,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "bf913875190550465485e861f02f475e"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.weight",
"shape": [
22016,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "58b0420021e0cad377d493e2b839bfdf"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 27280384,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392704
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 8396800
},
{
"name": "model.layers.35.self_attn.c_attn.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 8401920
},
{
"name": "model.layers.35.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 18887680
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27276288
}
],
"md5sum": "9d4274380d0c8f511ef6b04bd0c14051"
}
]
}