|
{ |
|
"metadata": { |
|
"ParamSize": 205, |
|
"ParamBytes": 3671255040.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 262144000, |
|
"records": [ |
|
{ |
|
"name": "lm_head.linear.weight", |
|
"shape": [ |
|
51200, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f94243661c5a87b38f3bb41b70d6ebf5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b13cdf199fa41a6015318c1e9563b5d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75014792943f75c804cb504bf349ad15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d66bbc9ff5c2c4456695dca08d90d680" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ab6bceeac17f3a1ebe76be2153ccc33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cfc960c8f3e40f3f8984b619b578c8f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a1f896b5733dda3e41688a8aa5d48f18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86a3e23a912734667d74787760b4cf14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "913b689959d1363a8fb2ca832deb7fb8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26480640, |
|
"records": [ |
|
{ |
|
"name": "lm_head.linear.bias", |
|
"shape": [ |
|
51200 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 102400, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "lm_head.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 102400 |
|
}, |
|
{ |
|
"name": "lm_head.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 107520 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 112640 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 117760 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 122880 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 143360 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 148480 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 153600 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13260800 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13276160 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13281280 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13286400 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13306880 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13312000 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13317120 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26424320 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26439680 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26444800 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26449920 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26470400 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26475520 |
|
} |
|
], |
|
"md5sum": "f53b583f02b20cdec88c289ac1360c67" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f2c12270558403bacbf455beac632f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f031084eb0520ba0155a8266460af39" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ec43d399b3b2c7a786a58842d7cc747" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d260f0b99188c3d223ab16d48d1c2bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2c988cf74aeb2ade3a874cb7c29d95c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5397e400be97efddd4b1e13600a0b70c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "f8a2d03403eb9e9cf7b9aa78181106b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f835d1be1a32e017fa3d6140eb46b44" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eb1b239145a55d4c181f83ef0fed1959" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1a2ebc4b8383975be62cb644e326db04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4dde6f985823ec281ab0ce5d2c2f4164" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 262144000, |
|
"records": [ |
|
{ |
|
"name": "transformer.embd.weight", |
|
"shape": [ |
|
51200, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e80be24cbe7c82aa854016c3950729e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3cf77e681ad8fd29e0ce784823198e16" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e20e12c7a3dca65da2f968dc73087129" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "9ac07740b35a8e13617dd0e709cb0d69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "686131149665cc509ab60acbda61725a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f924e11f5a49c3bdc4a18eaf7c49d5c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "25acdddd77a14c825123007c8a8c0f37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8686cab0ddf07e2d37ac73ffc4eaed45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "364369ce587148d62299cfe8a72b8770" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "04cfa745317707b5a3054a4a2c7e5616" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "f9ecd7131d396059066f14b928914dcf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7d90087370a2350e7ab1dacc145fe966" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bba2afa39105559b6abbd88707220870" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13bca4c465f23d066f8166398373c9fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d47f892b7c1e0ee1a3573d62a4e3dce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d3e5a7817c7bf9c81ed43f7cd2e1a82d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b457f920d75e816966f9c05eefa2beb9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "f232fe0a5e8b69aaf51f286cf9fd1f43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5837435a6f54fce26919f36840d2564e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f5c0f70857a2dc5c3a0e3ed0d8593f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3663f5adfd843eac31a20342a6e4f578" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66549463b0931dc08db16a65ebfcef58" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5dc9124d2ddfca57b608a85e630c1d05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "508d927411c8164c5c61e311480ad498" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "30e056b33c457e301b54e9028e4e3b7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02183b8cfd0913fb062d44f6dc7bb435" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c19fd2b41ca5d52e9b1f56210ba8d3d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa7ef0fe99061caae303a69488f71e76" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "03886d58a911935275e3ade03254936c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "992123d9456fe73af283b58198c3a8fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f1ee0816f479dfd634dcfab091e095d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "aca564b8e952b26db4b32d881c11d348" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a9530456770c3619145219284cdb6a82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "beb5db238e6ed35d95b08a85bd9e01e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00a4fa5a70deb767650b614fb10c8036" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2ad9d1721f84c5f31dff3d7d7a7a96c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "09e347d5895f73bc819e435b579362b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5ab7ebe94255ed18aa03fcae28f918d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "4ff6ed9e8c6244542ed1e0412969f707" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aeceb9bec9d4f99791bab976eda24623" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e28a069a898f94610d4f52f03a70a132" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eaeef94a51cf1d83f8d5132defd7af77" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b11cc2d4cb9dfdf49fc21a6d5ebfeaef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a511be3be0bf4534f77641d65119eafa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44343e3e55fe12e42a0fe3ce85be4edd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26327040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26286080 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26291200 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 26296320 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26316800 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 26321920 |
|
} |
|
], |
|
"md5sum": "00162d75939eb0330d894bb26f8c5685" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a8da9fa04ff9fbb632e404e55eb99acc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.fc1.weight", |
|
"shape": [ |
|
10240, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12e1356b7f7960e0b2005397eff796cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.fc2.weight", |
|
"shape": [ |
|
2560, |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd5efaa435d1a33f296c59529b249f7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 39321600, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.Wqkv.weight", |
|
"shape": [ |
|
7680, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 39321600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4eecc2ff6bbef80cc85ff61963f44e9d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26286080, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13122560 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.weight", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13127680 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.fc1.bias", |
|
"shape": [ |
|
10240 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 20480, |
|
"byteOffset": 13132800 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.fc2.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13153280 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.bias", |
|
"shape": [ |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5120, |
|
"byteOffset": 13158400 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.weight", |
|
"shape": [ |
|
2560, |
|
2560 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13163520 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.Wqkv.bias", |
|
"shape": [ |
|
7680 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 15360, |
|
"byteOffset": 26270720 |
|
} |
|
], |
|
"md5sum": "7768f3a73f324517de18e1b8b7417da3" |
|
} |
|
] |
|
} |