phi-2-q0f16-MLC / ndarray-cache.json
geonmin-kim's picture
Upload folder using huggingface_hub
9537a09 verified
raw
history blame
90.1 kB
{
"metadata": {
"ParamSize": 205,
"ParamBytes": 3671255040.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "lm_head.linear.weight",
"shape": [
51200,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "f94243661c5a87b38f3bb41b70d6ebf5"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.14.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b13cdf199fa41a6015318c1e9563b5d3"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.14.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "75014792943f75c804cb504bf349ad15"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.14.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d66bbc9ff5c2c4456695dca08d90d680"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.15.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3ab6bceeac17f3a1ebe76be2153ccc33"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.15.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cfc960c8f3e40f3f8984b619b578c8f4"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.15.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a1f896b5733dda3e41688a8aa5d48f18"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.16.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "86a3e23a912734667d74787760b4cf14"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.16.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "913b689959d1363a8fb2ca832deb7fb8"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 26480640,
"records": [
{
"name": "lm_head.linear.bias",
"shape": [
51200
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 102400,
"byteOffset": 0
},
{
"name": "lm_head.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 102400
},
{
"name": "lm_head.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 107520
},
{
"name": "transformer.h.14.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 112640
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 117760
},
{
"name": "transformer.h.14.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 122880
},
{
"name": "transformer.h.14.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 143360
},
{
"name": "transformer.h.14.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 148480
},
{
"name": "transformer.h.14.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 153600
},
{
"name": "transformer.h.14.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13260800
},
{
"name": "transformer.h.15.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13276160
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13281280
},
{
"name": "transformer.h.15.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13286400
},
{
"name": "transformer.h.15.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13306880
},
{
"name": "transformer.h.15.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13312000
},
{
"name": "transformer.h.15.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13317120
},
{
"name": "transformer.h.15.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26424320
},
{
"name": "transformer.h.16.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26439680
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26444800
},
{
"name": "transformer.h.16.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26449920
},
{
"name": "transformer.h.16.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26470400
},
{
"name": "transformer.h.16.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26475520
}
],
"md5sum": "f53b583f02b20cdec88c289ac1360c67"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.16.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1f2c12270558403bacbf455beac632f2"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.17.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4f031084eb0520ba0155a8266460af39"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.17.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1ec43d399b3b2c7a786a58842d7cc747"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.17.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "8d260f0b99188c3d223ab16d48d1c2bc"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.18.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e2c988cf74aeb2ade3a874cb7c29d95c"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.18.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5397e400be97efddd4b1e13600a0b70c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.16.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.16.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.17.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.17.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.17.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.17.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.17.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.17.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.18.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.18.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.18.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.18.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "f8a2d03403eb9e9cf7b9aa78181106b7"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.18.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "8f835d1be1a32e017fa3d6140eb46b44"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.19.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "eb1b239145a55d4c181f83ef0fed1959"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.19.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1a2ebc4b8383975be62cb644e326db04"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.19.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "4dde6f985823ec281ab0ce5d2c2f4164"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "transformer.embd.weight",
"shape": [
51200,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "8e80be24cbe7c82aa854016c3950729e"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.0.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3cf77e681ad8fd29e0ce784823198e16"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.0.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e20e12c7a3dca65da2f968dc73087129"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.18.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.18.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.19.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.19.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.19.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.19.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.19.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.19.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.0.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.0.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.0.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.0.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "9ac07740b35a8e13617dd0e709cb0d69"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.0.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "686131149665cc509ab60acbda61725a"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.1.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7f924e11f5a49c3bdc4a18eaf7c49d5c"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.1.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "25acdddd77a14c825123007c8a8c0f37"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.1.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "8686cab0ddf07e2d37ac73ffc4eaed45"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.10.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "364369ce587148d62299cfe8a72b8770"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.10.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "04cfa745317707b5a3054a4a2c7e5616"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.0.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.0.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.1.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.1.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.1.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.1.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.1.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.1.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.10.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.10.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.10.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.10.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "f9ecd7131d396059066f14b928914dcf"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.10.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "7d90087370a2350e7ab1dacc145fe966"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.11.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bba2afa39105559b6abbd88707220870"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.11.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "13bca4c465f23d066f8166398373c9fe"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.11.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0d47f892b7c1e0ee1a3573d62a4e3dce"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.12.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d3e5a7817c7bf9c81ed43f7cd2e1a82d"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.12.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b457f920d75e816966f9c05eefa2beb9"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.10.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.10.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.11.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.11.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.11.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.11.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.11.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.11.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.12.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.12.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.12.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.12.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "f232fe0a5e8b69aaf51f286cf9fd1f43"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.12.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "5837435a6f54fce26919f36840d2564e"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.13.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6f5c0f70857a2dc5c3a0e3ed0d8593f3"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.13.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3663f5adfd843eac31a20342a6e4f578"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.13.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "66549463b0931dc08db16a65ebfcef58"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.2.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5dc9124d2ddfca57b608a85e630c1d05"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.2.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "508d927411c8164c5c61e311480ad498"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.12.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.12.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.13.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.13.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.13.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.13.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.13.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.13.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.2.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.2.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.2.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.2.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "30e056b33c457e301b54e9028e4e3b7c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.2.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "02183b8cfd0913fb062d44f6dc7bb435"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.3.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c19fd2b41ca5d52e9b1f56210ba8d3d8"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.3.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "aa7ef0fe99061caae303a69488f71e76"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.3.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "03886d58a911935275e3ade03254936c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.4.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "992123d9456fe73af283b58198c3a8fe"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.4.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2f1ee0816f479dfd634dcfab091e095d"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.2.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.2.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.3.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.3.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.3.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.3.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.3.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.3.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.4.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.4.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.4.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.4.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "aca564b8e952b26db4b32d881c11d348"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.4.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a9530456770c3619145219284cdb6a82"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.5.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "beb5db238e6ed35d95b08a85bd9e01e9"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.5.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "00a4fa5a70deb767650b614fb10c8036"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.5.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "2ad9d1721f84c5f31dff3d7d7a7a96c7"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.6.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "09e347d5895f73bc819e435b579362b4"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.6.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b5ab7ebe94255ed18aa03fcae28f918d"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.4.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.4.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.5.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.5.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.5.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.5.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.5.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.5.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.6.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.6.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.6.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.6.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "4ff6ed9e8c6244542ed1e0412969f707"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.6.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "aeceb9bec9d4f99791bab976eda24623"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.7.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e28a069a898f94610d4f52f03a70a132"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.7.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "eaeef94a51cf1d83f8d5132defd7af77"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.7.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b11cc2d4cb9dfdf49fc21a6d5ebfeaef"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.8.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "a511be3be0bf4534f77641d65119eafa"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.8.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "44343e3e55fe12e42a0fe3ce85be4edd"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 26327040,
"records": [
{
"name": "transformer.h.6.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.6.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.7.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.7.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.7.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.7.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.7.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.7.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
},
{
"name": "transformer.h.8.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26286080
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26291200
},
{
"name": "transformer.h.8.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 26296320
},
{
"name": "transformer.h.8.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26316800
},
{
"name": "transformer.h.8.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 26321920
}
],
"md5sum": "00162d75939eb0330d894bb26f8c5685"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.8.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a8da9fa04ff9fbb632e404e55eb99acc"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.9.mlp.fc1.weight",
"shape": [
10240,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "12e1356b7f7960e0b2005397eff796cd"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "transformer.h.9.mlp.fc2.weight",
"shape": [
2560,
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "dd5efaa435d1a33f296c59529b249f7c"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.9.mixer.Wqkv.weight",
"shape": [
7680,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "4eecc2ff6bbef80cc85ff61963f44e9d"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 26286080,
"records": [
{
"name": "transformer.h.8.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "transformer.h.8.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 13107200
},
{
"name": "transformer.h.9.ln.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13122560
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13127680
},
{
"name": "transformer.h.9.mlp.fc1.bias",
"shape": [
10240
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20480,
"byteOffset": 13132800
},
{
"name": "transformer.h.9.mlp.fc2.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13153280
},
{
"name": "transformer.h.9.mixer.out_proj.bias",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 13158400
},
{
"name": "transformer.h.9.mixer.out_proj.weight",
"shape": [
2560,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13163520
},
{
"name": "transformer.h.9.mixer.Wqkv.bias",
"shape": [
7680
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 15360,
"byteOffset": 26270720
}
],
"md5sum": "7768f3a73f324517de18e1b8b7417da3"
}
]
}