|
{ |
|
"metadata": { |
|
"ParamSize": 485, |
|
"ParamBytes": 12415229952.0, |
|
"BitsPerParam": 5.00081018149736 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00284ef2b463215cfc463010f321b10f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b629921bccee1f0d0fc4a28c3738425" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21233664 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24379392 |
|
} |
|
], |
|
"md5sum": "527f54d68f1391943cd99ba6f0766995" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b852c980e98d1be149c8f25702e5edd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "594cc542e523dd583dd0f4b4d5433018" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72f358e79b8ce3ad26cbff27d21c110d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "567766e2e606b1504409bc05ce25d986" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.0.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "5f6af20d76b092e2eae713d51d0b2316" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e0ee16703bdc83a3cc171acb83ece87" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c19ae1b402ca35373381209750ac083" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ea35bfabe8ebe4ce81194879887850d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4984b3a8a93f30161828ddcdf8c4726c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.1.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e0c94f7240178fdb1665d8b03d267700" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a973332765dd568c26ceac7ea6a0e7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b70e1be8b131f50c6018dec9516dc17a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ebe5a55987f64f40370cf5a583dd38d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50d46afa2c2b532d0a4c37bbb6be9236" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89b43a668dc71a4c594f041b76a1f08a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72c1c5a128357d38e90668edb2268f7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29908992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.10.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 24391680 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 26750976 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 29896704 |
|
} |
|
], |
|
"md5sum": "6078e600ba0de2bc58adab9d7e918f49" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac3c4d8201252cb948e38c0e8871ac2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c69a111df96a3fc79aea18b618a916c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9c01a19332e2adb930db683e530c91b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5479be3e34bd3a18e63ebe8977c0a65e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.2.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "6eb8bda82655d3df6c83bb20e4713205" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "95c51f5f4fc1b7fcb98398b65b7c5d4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b84b0a3d48c305df2d7a5c22bbd4f20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f24d75e5c16f85904f8c8c29b57481f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e15002174167919e1eda921c51fba38" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.3.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "b4deb97fd05b39bbffa1c05a7343217e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26e160d3918d689f35da649de61c7230" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "349eb6d5eed5399e5e1afa68877e889f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "160c6446975d5ceaa6372e00f250db8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "25558c41244691ea68e5e1cd29484abd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.4.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "55629761943083ef1bdadb84097efc38" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "802e9ddc7310e14dd8664de74c12e6bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ca404a2c8262e6448e729724bf99ae3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22ce86177afd884ee20dae0bebc6c106" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86f1c3c9305d018f5132a7e790a4d7cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.5.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "6abbaee8380fba70014b21eb525acd62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf41cd47c2e835e2958c022cd42ee082" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "83fdea8e34ce86e90af2c0723109482d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44a2698b1f84e642c12209710a00c86c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec1a742c6dd4bfb50a52719bcbe4a6f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.6.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "f11f38d92a05c8e64f353a2c8b1ef96b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "330b2c9435023374d015775f9631c0eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a9d75c8ee5f764d31903e266462838b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d72ca566fddb54ead1fcdff8b7b53d36" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43fe8e433180a84ed4add127feb489e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.7.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "1bcaabd831c6df949af2a2c99aa391d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7eeaed1e2fd0d650d3adbbc3d70d62f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2850456422dafdf6a3ad21583b83ea94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "06d0e7a00ed7d447049d328ebbc27a56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eff8f28665fb3d94927a4d690128db69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.8.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "10d45a0386cd768f1a38bc9c99118bf4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f50250f4d015cebf33c51e550a6a558" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 284295168, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_weight", |
|
"shape": [ |
|
92544, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 284295168, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2dd9e7b5161e20e4d85dee74ae980199" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35536896, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_scale", |
|
"shape": [ |
|
92544, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 35536896, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "966f48b841b8d678e03e9806709d63b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74af130ac4fb9d8caf5af3fbd80ca161" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4dbe32af3028d2ab8ed626982b3f3a7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31481856, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.9.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 18898944 |
|
} |
|
], |
|
"md5sum": "437c70592dbff13eb274e84dc5d475f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "103bd4315c0d5e8b1f6e02608ce71648" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96ad9f6264c30961b8bb9d8322ff028e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30695424, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 6291456 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 6303744 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 25178112 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 27537408 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 30683136 |
|
} |
|
], |
|
"md5sum": "ac0bb20f0cf8e930e7c25bf5476bf21b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b1f5ecb6ca96f6c06f4ed28fbf03facd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5788364a5bc1e426c0092ba506ac88ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "476632fb1c35b3ffb77d627532bc03f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "758524bcd105b05f63a5d71827b66f26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.12.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "379cd71d453310281511d95f270432c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b103cb0357b37228f103c6c5699916d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd26f1a288dd8d21399fc51e72729e8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c649c1685be04409e8bbdc07011fa517" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de8b0d86bf134e76ceea123cf3f14d8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.13.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "a9a9a45b14236a23be37135a86032e5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f2b7ecb18020990fd511a92ff38d340" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "108d512f0cf4571de9606b20b69dde22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1c7bbe39bfe6f291da451e622c81313" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf1911680f3663ef1f8d93bf95d37d22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.14.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "7dbefcf306cf30966fdeaaa6c94ec116" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "733df901831944e32ccf825060952406" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a20b7ea01b63d1ca1b2fa6421a1a989" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77dde9b3f4c9684e5483377fd769b1ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d7a7756a94ea7bc8b9947d232e5024d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.15.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "ab796d825f8dc85dab90b1b00d7aa872" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c532bfe59af4beb0f13aa56cbe83a2de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccb30288735bf955a97f4766013dfe37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e60341ce11dc8ba45ea124c3a000292" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0cc2e84cdb8c75cb866a0e18cd872df5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.16.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "36b48b2a9c1c14b98b37d468c6cde1b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9431304c21d6bd745182e0bb9e9c1c0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7258e48041c8a95042ba5e36f8419057" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "83788a9066bdff886784a464d6cf8b50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f65a4aa2a217b8e01ca0e4a3393ec01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.17.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "bb84c0a28cee4cec53007b013706031c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f0b46d5af70c4e5e051921e8575dae9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d34443174a1d161d011b75c4622320a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5f587454560db68552c91c80e8a2167e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc41aac634822122e1b812cf09b4fd3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.18.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "bd879f531d67097cacc6f66e6013bc6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4159726292aab97f45285779b4cbec30" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43c5d41337bff9653e51d5a779fbb65a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3691a094aa91939df7dec2371809d94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f50853b2db64dff9649305daddf299e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.19.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "076edf9a22f10c86d2e0aa1134480f0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "314871849addd0914a61470d09c00d3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61ea95ad988deab855b1eeecf346fda1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63987ea2ffccc21c68679cf17435f48b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b17fc37892c02d03b97eef6f4dc561b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.20.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "90688941650e245560cefb7e210f76fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "821d9bb73d369855fa894f78f35aefee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2a11fe75c93f2ddae9902abbac674422" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd20a56a827c8f7e860cb11b28cd2b7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2447c5f423bded8419a81e9991f572f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.21.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "6eae2ad8eee54be7f63bde100fb62231" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b24f426958c6b855f6f95e14d9be139" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9cc3230c3c027b10e80dff08adf0a78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "29eba1a47dc1cf87f59b0fade436b543" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f678b6d832024e53015816cc5ec51072" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.22.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "5b7189b7f51a5b171674a76a1d75f691" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "508ca62f0bcfcfc8280bcbc2494c32ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de1db8eb1512766e7fa21d2cdfacdb70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a955051ad2bfd366a96d81b9c560ab5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ea0c959128e9ee2cc6c32a746b8ff09e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.23.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "8622abdc949e2d88b54cafc655de47ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f434f5e3991c9c9c9882af18be749fc4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec461d88c4fc6a212eca45c40e65cbab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e68d544dab45df74293e70ef03f944da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87a9d5e7eaae4e259628990035d8fa41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.24.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e3402b08f43ad28de2dd036b76aac55c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa4fa4a1c0cdd8bb3959d247b3caa895" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c997ac7ad8859dd5662cc4fa65e1f59" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98808696bd17e3f4877f94ae57de66c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf1f5dc59f4545d3e3c7533da0985c35" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.25.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "35ba152aed7d162d2fd0905c55dfda71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76bbf8feb812602034633b441c4c1bdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd146a5eaddc8cf1ffdd72605a2dbed9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76c6d0263361b79ffe484451392e2922" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f79215a5d111ace561aba58dc90e569" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.26.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "6e975e2691c59495fc857fcacdc73b28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fae53a3d7c2d52f056ae6f33822cabdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "531d3a4c2b5e041de61c91eec2d3722a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf46185cbf1b28ee4a03134026850c60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f862e7fdf14a772a2da0102aaa8bf143" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.27.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "9db0f23fddffbc788735d8d0aa57640b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df553356a969c3a737f939eb62054831" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14eb5b5bcdd58ae0c9dd47e74dcbe759" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f612732a5cb843c4a9379968982dbf7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f433aa0734aa232dbc0cc812baf6daeb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.28.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "5ec13f3c8e9ad06b997ec4ee954d7d97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b55d6c5b4e3cbb41768a50882e51ab3d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96d71035e4dfaa23cf672fcf4b3148ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "281351522900e1ed0f106ddc5ffa26e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "726e8a7f835904c85aad54423c7d3a40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.29.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "254c3ebe5fe7dcc4e5dd795067317b82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ef2ba1ad4d1453c184fb1c7319dd9c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0c9317ba5f9aaa4416fe68e45ded617" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a477bd5fcdd32c040bc0dbae50b93df5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c79299422f56a51832f8f360150dd81e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.30.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "807d4a9725b140f5bf95136c137ad4a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef1ed74e341ae11b4b4a3ddc66eb15d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89b29e5f3381e40d07d146cb9086257d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c8a97f5ae3599ec917df53f247c32e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e0b40b7c224f8d23e2bbd05feed1afa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.31.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "7eff74f813c073ff0fd6b021f80ff545" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "03bdb41601a66ec7c43350268194c94a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f852fcb989a6edd819f494af113e36f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0acce77d092085f687864e55bc5a359" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eea08e28000c1cfe5410d6e7dfa7b1df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.32.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "12def825c7928f5e1179ee5647ce8415" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14bd5dc653fdc418cbc7224efd42c0e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75c058b004e1529ac64ac26e2d34eb9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "33695918d34627539c8ce45c7d48a898" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "628a63573d1bac6ef42b55f38b0d1940" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.33.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "f31fb49e035888712b297475c52864b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2eb378861bc29d98bafface5ab066db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "31503682e00d6487b61f43a21b25bf8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "567fbde9ba586cbd71a37fb9b6af50df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2eedaadc51dc8692cf212161cbef3a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.34.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "69b518ec54f6677fcd919c9253b0614e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3983774f8fc3f7c182befdaf92183fcf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cfc73b0d1f4316f639b608ba09c60df7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef8bb46de2ff642420174496ff8eb9e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3705cfbda7b10118d52053e4bcf5b53b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.35.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "60d7d3b3348e08fa93bfc93a92843e35" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dea2c5c8e8f65bae170d749819af3b84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8abc7a7c35584caee36d6ca34d41d5e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c5f3d415b4decb3a935aadf5f74f6a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "234b8f8209919d4e786b83e0a48f1a53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.36.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.36.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "f9a0414ec40d9bf8560cde85f764a729" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "caadc9898fa702e45d7a8066d6fc034b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "daca59f2befcbf65ca4f22132e1f5968" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a64ed9dfed562df2a47360291349c392" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16560fdb4bf490a9f510a99d03ae7f65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.37.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e9776f6d848093cd8ae111d82c5470f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f83f4681d5135acfefeaed331013a245" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79262a1b152d0c6dac9b81bd23c4b732" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "494adcfe5d06195f184c89b9641b7b02" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f0a0694e12495696d67af7793e52814" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.38.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "b6fb3b57ef9e289ee317fad962a27274" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef20c9cd3ace2631ee57f473491ee84f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00ba24af7ddee4fd0df90052c80006f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dddc0f57281ddfe8175f7f7de227bc4f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e8af89ea7ad941c3495e95cd4946df5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.39.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "cb2b6d03e66660fdea2851e6d6242f62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2ad00ee80dea62083d12480dff27142f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8bbdf2b4613d4d2492b52721519e0330" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5853bdf385a2cf95cbb546d8a60f52e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86b8a754afa6b098fd4da8c5adf2bbde" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.40.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.40.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "8072f10fc32633f7ef13f12284581bfd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dce2c7f9765e1bf49f7cffa2f44d9980" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "38f4c827a9da73a327a03a672fdb3864" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2ac7f290ce44fd7fc72ac4b663e35c17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b1a74c2b552d5f35279c762744e5dfe8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.41.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.41.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "1bb78ce0fedee9d4f4685ecd4bbc0baf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aac7ba81c61f2aed1b363f37c9ff9b1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ae3315184976b9bb3dbf4c97a8eb7a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26b7bf85fe6a4013122193df0e1dbc6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80d536ada8406f0655c2895a1700fac8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.42.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.42.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "d3445886e6570c376d9b9a53f0b1e822" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f90ce24a00118439f3ccb44a0e25280" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00000f7db76c8343c8e1fc3ac220a7ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "116acd05f0ea0602a2b1294c7862986b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a712ed019202c2f1ad61188e5a22eb4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.43.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.43.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "3aba4040778ad5da5bb7bfdb11c30159" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2efc16c7b850c4a57d64f37b0e0d26d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9cf7537c22a75aaf83b658a94d08bb0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4889b75762a9e488476c7ec782ac2756" |
|
}, |
|
{ |
|
"dataPath": "params_shard_227.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e27715da69d6d827ed0489a1b24c7479" |
|
}, |
|
{ |
|
"dataPath": "params_shard_228.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.44.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.44.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e057dbb01489145de195eb4f848fb468" |
|
}, |
|
{ |
|
"dataPath": "params_shard_229.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73c091f28524a94856dc84ee54dc8765" |
|
}, |
|
{ |
|
"dataPath": "params_shard_230.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a098d1d326bd7e64488b230a35914a4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_231.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ed98589f051eb6185235ebddd8f8bef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_232.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e62a203a74743c9f2b17d31a82f2ca7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_233.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.45.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.45.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "1ddffa454715f1f5cc29c7c4047d7bc0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_234.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7803532c3933b7164ea4818c95477c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_235.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d3343c87f3a48d5f8402b2fc7332c73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_236.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7765eac572da6689c28cfbb5bdcbe598" |
|
}, |
|
{ |
|
"dataPath": "params_shard_237.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de22eb28ca84a24c4c00f94cbd2a6440" |
|
}, |
|
{ |
|
"dataPath": "params_shard_238.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.46.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.46.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "641f2888aa1d9ea5626656dd0fd7f957" |
|
}, |
|
{ |
|
"dataPath": "params_shard_239.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4d8b731f9e9505ec082f59f37f594e19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_240.bin", |
|
"format": "raw-shard", |
|
"nbytes": 284295168, |
|
"records": [ |
|
{ |
|
"name": "output.q_weight", |
|
"shape": [ |
|
92544, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 284295168, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d30f9fe98d213ebed349055a203f27fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_241.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35536896, |
|
"records": [ |
|
{ |
|
"name": "output.q_scale", |
|
"shape": [ |
|
92544, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 35536896, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee3e0373996e5f8361443ebfc937e79e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_242.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18898944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.47.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.47.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
} |
|
], |
|
"md5sum": "fb90ffc3790a2dcfd1d93382925673d3" |
|
} |
|
] |
|
} |