diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,13902 @@ +{ + "metadata": { + "ParamSize": 1259, + "ParamBytes": 3086981120.0, + "BitsPerParam": 15.340189136197463 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 27778560, + "records": [ + { + "name": "model.encoder.conv1.weight", + "shape": [ + 1280, + 128, + 3 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 983040, + "byteOffset": 0 + }, + { + "name": "model.encoder.conv1.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 983040 + }, + { + "name": "model.encoder.conv2.weight", + "shape": [ + 1280, + 1280, + 3 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 985600 + }, + { + "name": "model.encoder.conv2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 10816000 + }, + { + "name": "model.encoder.embed_positions.weight", + "shape": [ + 1500, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3840000, + "byteOffset": 10818560 + }, + { + "name": "model.encoder.layers.0.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14658560 + }, + { + "name": "model.encoder.layers.0.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 17935360 + }, + { + "name": "model.encoder.layers.0.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 21212160 + }, + { + "name": "model.encoder.layers.0.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 21214720 + }, + { + "name": "model.encoder.layers.0.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 24491520 + }, + { + "name": "model.encoder.layers.0.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24494080 + }, + { + "name": "model.encoder.layers.0.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 27770880 + }, + { + "name": "model.encoder.layers.0.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 27773440 + }, + { + "name": "model.encoder.layers.0.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 27776000 + } + ], + "md5sum": "d9c39614c369f40fa5d91361fff5a980" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.0.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.0.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.0.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.0.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.0.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.0.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.1.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.1.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.1.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "6ad6c15993b0725bc6a915da5b77c448" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.1.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.1.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.1.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.1.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.1.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.1.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.1.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.1.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.1.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.1.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.1.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.1.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "6cad2078ea90af500d5e2f40e97fd724" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.2.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.2.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.2.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.2.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.2.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.2.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.2.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.2.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.2.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.2.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.2.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "2cc94b34d529cc603118e9ef505fab0f" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.2.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.2.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.2.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.2.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.3.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.3.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.3.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.3.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.3.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.3.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.3.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.3.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.3.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "f75a9bdaab48cb1bab0d864ea4809445" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.3.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.3.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.3.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.3.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.3.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.3.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.4.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.4.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.4.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "6d8fbdfcaa9ecc7da4db2572a058bb8f" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.4.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.4.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.4.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.4.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.4.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.4.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.4.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.4.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.4.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.4.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.4.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.4.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "800dce714a51b12eb29967cc1f18a261" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.5.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.5.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.5.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.5.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.5.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.5.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.5.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.5.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.5.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.5.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.5.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "ecd5316d36c2ee5f1d0bd13518b63a51" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.5.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.5.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.5.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.5.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.6.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.6.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.6.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.6.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.6.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.6.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.6.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.6.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.6.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "c53c44c5b3a92732a8fcacd014aa27df" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.6.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.6.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.6.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.6.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.6.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.6.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.7.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.7.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.7.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "b9d9d5c31ce835faf70d9606bbe78ce7" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.7.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.7.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.7.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.7.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.7.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.7.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.7.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.7.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.7.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.7.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.7.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.7.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "9a3c372c25c44804cb5fad1ee9513600" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.8.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.8.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.8.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.8.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.8.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.8.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.8.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.8.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.8.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.8.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.8.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "70687d4e2e299731937e8a299b200d53" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.8.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.8.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.8.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.8.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.9.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.9.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.9.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.9.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.9.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.9.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.9.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.9.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.9.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "1046e750f8945b90ed1311c01abdc65a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.9.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.9.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.9.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.9.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.9.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.9.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.10.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.10.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.10.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "5f0f6571552f549417341c9b733518d3" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.10.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.10.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.10.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.10.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.10.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.10.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.10.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.10.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.10.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.10.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.10.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.10.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "232f5a1b071cd30e57c48ef798ac65ac" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.11.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.11.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.11.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.11.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.11.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.11.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.11.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.11.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.11.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.11.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.11.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "827abf51b707465c609b1749e782f7b7" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.11.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.11.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.11.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.11.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.12.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.12.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.12.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.12.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.12.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.12.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.12.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.12.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.12.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "ef9aa6e8c4294915e522a164475428e3" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.12.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.12.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.12.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.12.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.12.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.12.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.13.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.13.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.13.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "33cb59bbae8c74695a79aa6b40784116" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.13.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.13.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.13.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.13.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.13.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.13.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.13.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.13.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.13.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.13.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.13.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.13.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "ae253ee08d9cc43c05e89ea13b9c8a9c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.14.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.14.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.14.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.14.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.14.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.14.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.14.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.14.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.14.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.14.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.14.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "867d34d19965627d4e67b1ff52328db3" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.14.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.14.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.14.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.14.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.15.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.15.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.15.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.15.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.15.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.15.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.15.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.15.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.15.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "0722e7aed700446e1322de71c7ea627b" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.15.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.15.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.15.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.15.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.15.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.15.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.16.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.16.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.16.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "724cd49c6e3f0b59b06bb1e6eb79cd7a" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.16.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.16.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.16.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.16.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.16.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.16.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.16.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.16.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.16.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.16.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.16.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.16.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "2ca0cb8a436117896bfc31539ddbb844" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.17.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.17.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.17.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.17.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.17.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.17.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.17.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.17.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.17.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.17.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.17.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "6db50501724b685960433d38b5e6c0cf" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.17.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.17.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.17.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.17.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.18.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.18.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.18.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.18.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.18.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.18.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.18.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.18.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.18.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "6021489677857845242886ace75c5fe1" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.18.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.18.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.18.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.18.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.18.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.18.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.19.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.19.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.19.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "2b7214718a910a1007837722f6c060a9" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.19.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.19.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.19.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.19.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.19.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.19.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.19.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.19.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.19.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.19.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.19.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.19.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "c13eaa07fd40073e73ef18e39752f172" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.20.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.20.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.20.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.20.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.20.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.20.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.20.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.20.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.20.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.20.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.20.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "f3b5a30a71a4037597d004c7353916cc" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.20.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.20.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.20.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.20.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.21.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.21.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.21.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.21.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.21.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.21.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.21.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.21.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.21.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "d02d8ddcee98ddc073ab41c53dd1c3b6" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.21.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.21.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.21.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.21.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.21.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.21.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.22.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.22.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.22.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "3dcec1ac6e70b5e50415fcbb961e0d6d" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.22.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.22.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.22.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.22.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.22.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.22.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.22.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.22.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.22.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.22.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.22.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.22.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "ac1de8fe0f39900c6f03e35e40848eea" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.23.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.23.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.23.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.23.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.23.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.23.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.23.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.23.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.23.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.23.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.23.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "7afbdf1c454b3e54368e26b018405455" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.23.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.23.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.23.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.23.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.24.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.24.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.24.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.24.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.24.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.24.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.24.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.24.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.24.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "9259cf0f5ab20b94909caf938048ce59" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.24.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.24.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.24.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.24.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.24.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.24.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.25.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.25.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.25.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "1d96645578590d3c9775adc8f1d0d735" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.25.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.25.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.25.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.25.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.25.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.25.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.25.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.25.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.25.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.25.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.25.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.25.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "12dcb9b5bdb67881748a240118071581" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.26.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.26.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.26.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.26.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.26.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.26.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.26.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.26.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.26.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.26.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.26.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "6b13614dbf99a27f2da365fbec0ec0af" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.26.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.26.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.26.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.26.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.27.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.27.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.27.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.27.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.27.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.27.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.27.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.27.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.27.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "57dd253b32050c0c1c26b6cf317289ae" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.27.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.27.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.27.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.27.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.27.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.27.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.28.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.28.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.28.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "d1f8da7d674cdbeb5ec2039d2981091d" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.encoder.layers.28.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.28.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.28.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.28.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.28.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.28.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.28.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.28.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.28.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.28.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.28.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.28.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "cc0f53f30774c3a38bcfab411e9f2d94" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26237440, + "records": [ + { + "name": "model.encoder.layers.29.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.29.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.29.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.encoder.layers.29.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.29.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.encoder.layers.29.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.encoder.layers.29.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.29.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.29.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.29.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13120000 + }, + { + "name": "model.encoder.layers.29.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26227200 + } + ], + "md5sum": "0700d1a3a2f920a30ebf8edbcdcfc361" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 26234880, + "records": [ + { + "name": "model.encoder.layers.29.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.29.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.29.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.encoder.layers.29.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.encoder.layers.30.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.encoder.layers.30.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.encoder.layers.30.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.encoder.layers.30.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.30.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.encoder.layers.30.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.encoder.layers.30.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.30.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.30.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + } + ], + "md5sum": "5302f184be04d191ee6a54eff8f6b7b7" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.encoder.layers.30.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.30.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.encoder.layers.30.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.encoder.layers.30.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.encoder.layers.30.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.encoder.layers.30.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.encoder.layers.31.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.encoder.layers.31.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.encoder.layers.31.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "738beb16f37be0a45564c29ea23e05b2" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 132776960, + "records": [ + { + "name": "model.decoder.embed_tokens.weight", + "shape": [ + 51866, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 132776960, + "byteOffset": 0 + } + ], + "md5sum": "a9d9ed2d43b66ca36c1860cfc3834797" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.encoder.layers.31.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.encoder.layers.31.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.encoder.layers.31.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.encoder.layers.31.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.encoder.layers.31.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.encoder.layers.31.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.encoder.layers.31.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.encoder.layers.31.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.encoder.layers.31.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.encoder.layers.31.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.encoder.layers.31.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.encoder.layers.31.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + }, + { + "name": "model.encoder.layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32796160 + }, + { + "name": "model.encoder.layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32798720 + } + ], + "md5sum": "598db2ad37c2287a61e379ae5dbe9a82" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 27386880, + "records": [ + { + "name": "model.decoder.embed_positions.weight", + "shape": [ + 448, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1146880, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.0.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 1146880 + }, + { + "name": "model.decoder.layers.0.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 4423680 + }, + { + "name": "model.decoder.layers.0.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 7700480 + }, + { + "name": "model.decoder.layers.0.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 7703040 + }, + { + "name": "model.decoder.layers.0.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 10979840 + }, + { + "name": "model.decoder.layers.0.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 10982400 + }, + { + "name": "model.decoder.layers.0.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 14259200 + }, + { + "name": "model.decoder.layers.0.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 14261760 + }, + { + "name": "model.decoder.layers.0.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 14264320 + }, + { + "name": "model.decoder.layers.0.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14266880 + }, + { + "name": "model.decoder.layers.0.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 17543680 + }, + { + "name": "model.decoder.layers.0.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 20820480 + }, + { + "name": "model.decoder.layers.0.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 20823040 + }, + { + "name": "model.decoder.layers.0.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 24099840 + }, + { + "name": "model.decoder.layers.0.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 24102400 + }, + { + "name": "model.decoder.layers.0.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 27379200 + }, + { + "name": "model.decoder.layers.0.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 27381760 + }, + { + "name": "model.decoder.layers.0.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 27384320 + } + ], + "md5sum": "300616b31bc4f70cd9c9b9b1d7db30af" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.0.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.0.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.0.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.0.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.0.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.0.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.1.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.1.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.1.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "8b33bc60291f73b24acf2ee130373226" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.1.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.1.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.1.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.1.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.1.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.1.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.1.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.1.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.1.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.1.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.1.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.1.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.1.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.1.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.1.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.1.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.1.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "a445d6537e820d6a0df37961949da29b" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.1.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.1.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.1.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.1.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.2.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.2.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.2.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.2.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.2.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.2.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.2.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.2.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.2.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.2.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.2.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.2.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "e6866029fd87980b2b62ee6ac62699dd" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.2.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.2.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.2.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.2.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.2.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.2.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.2.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.2.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.2.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.2.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.2.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.2.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "932712ec6747d7bc912f54bb676dcdf1" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.3.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.3.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.3.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.3.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.3.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.3.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.3.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.3.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.3.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.3.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.3.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.3.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.3.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.3.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.3.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.3.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.3.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.3.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "8b08e09cb05a2932eafb58709d22448a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.3.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.3.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.3.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.3.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.3.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.3.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.4.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.4.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.4.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "89940af43031e4275d67b686878e90b4" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.4.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.4.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.4.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.4.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.4.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.4.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.4.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.4.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.4.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.4.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.4.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.4.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.4.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.4.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.4.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.4.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.4.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "c2667de5a74c0c360a2ff2870647222a" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.4.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.4.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.4.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.4.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.5.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.5.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.5.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.5.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.5.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.5.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.5.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.5.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.5.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.5.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.5.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.5.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "4f658c2dbd41f8af715d91ed42f48fde" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.5.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.5.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.5.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.5.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.5.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.5.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.5.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.5.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.5.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.5.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.5.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.5.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "cdc726013fcc2b0ebb19115a38cbd935" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.6.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.6.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.6.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.6.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.6.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.6.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.6.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.6.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.6.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.6.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.6.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.6.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.6.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.6.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.6.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.6.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.6.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.6.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "9483ce32ddc7e9d74aae55ca15fb0b34" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.6.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.6.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.6.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.6.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.6.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.6.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.7.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.7.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.7.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "6e5e7247c995317c79d8760b45227e03" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.7.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.7.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.7.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.7.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.7.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.7.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.7.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.7.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.7.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.7.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.7.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.7.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.7.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.7.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.7.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.7.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.7.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "050c7a3370fffbfcf21ae4caac80d410" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.7.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.7.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.7.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.7.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.8.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.8.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.8.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.8.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.8.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.8.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.8.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.8.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.8.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.8.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.8.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.8.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "c37f97e4fbbabfb18a27ff9f7030bf47" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.8.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.8.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.8.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.8.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.8.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.8.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.8.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.8.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.8.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.8.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.8.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.8.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "720514143786f0de9f8fe6a5b18054db" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.9.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.9.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.9.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.9.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.9.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.9.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.9.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.9.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.9.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.9.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.9.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.9.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.9.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.9.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.9.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.9.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.9.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.9.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "417cbbbff1a115ec20e9ebaf4c300f0c" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.9.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.9.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.9.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.9.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.9.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.9.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.10.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.10.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.10.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "f1f9913d678bdf56b63e466c692df469" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.10.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.10.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.10.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.10.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.10.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.10.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.10.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.10.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.10.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.10.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.10.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.10.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.10.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.10.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.10.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.10.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.10.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "a93d8a2278a731d19ed60a737db1fb47" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.10.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.10.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.10.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.10.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.11.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.11.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.11.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.11.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.11.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.11.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.11.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.11.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.11.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.11.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.11.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.11.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "684334211a2d3fc285e1a2b9974fb85d" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.11.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.11.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.11.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.11.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.11.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.11.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.11.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.11.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.11.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.11.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.11.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.11.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "3d5f8792be5a7e338ef2fd55282ce8bc" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.12.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.12.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.12.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.12.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.12.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.12.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.12.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.12.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.12.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.12.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.12.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.12.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.12.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.12.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.12.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.12.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.12.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.12.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "8b244015460e5469f308b28033a217dc" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.12.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.12.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.12.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.12.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.12.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.12.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.13.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.13.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.13.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "18796ca85dfde359382f825a3c97f159" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.13.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.13.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.13.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.13.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.13.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.13.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.13.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.13.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.13.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.13.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.13.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.13.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.13.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.13.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.13.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.13.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.13.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "0bf3a88f35cee4db5157a6776c690ed9" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.13.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.13.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.13.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.13.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.14.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.14.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.14.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.14.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.14.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.14.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.14.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.14.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.14.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.14.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.14.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.14.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "3603257cac4fef732389f6ece6d7751f" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.14.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.14.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.14.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.14.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.14.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.14.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.14.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.14.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.14.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.14.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.14.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.14.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "e40b18ae72348340a9182cb96e856dc9" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.15.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.15.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.15.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.15.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.15.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.15.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.15.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.15.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.15.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.15.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.15.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.15.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.15.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.15.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.15.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.15.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.15.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.15.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "090d61b068beb02f91d3b31ab1e5ee0c" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.15.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.15.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.15.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.15.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.15.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.15.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.16.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.16.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.16.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "894008067e6fa4893e997e20e64b97b8" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.16.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.16.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.16.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.16.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.16.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.16.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.16.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.16.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.16.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.16.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.16.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.16.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.16.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.16.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.16.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.16.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.16.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "94536372286f3094c2acfc956d7711db" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.16.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.16.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.16.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.16.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.17.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.17.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.17.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.17.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.17.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.17.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.17.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.17.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.17.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.17.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.17.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.17.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "c6ebb453464b38eddc5c5fd18e4be89b" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.17.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.17.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.17.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.17.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.17.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.17.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.17.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.17.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.17.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.17.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.17.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.17.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "07c5b66f9812636a0d1063b88ee9834b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.18.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.18.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.18.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.18.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.18.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.18.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.18.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.18.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.18.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.18.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.18.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.18.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.18.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.18.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.18.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.18.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.18.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.18.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "90d65223683eaa8c4fefb604f4f71b66" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.18.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.18.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.18.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.18.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.18.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.18.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.19.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.19.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.19.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "5a9a380357798af6ab51a7d846a68ae7" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.19.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.19.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.19.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.19.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.19.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.19.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.19.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.19.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.19.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.19.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.19.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.19.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.19.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.19.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.19.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.19.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.19.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "c2f4f49cf03fd0bcd4f1baf7826c9627" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.19.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.19.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.19.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.19.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.20.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.20.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.20.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.20.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.20.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.20.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.20.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.20.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.20.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.20.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.20.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.20.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "8b5f0c694fedd68d03e51be458d33633" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.20.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.20.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.20.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.20.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.20.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.20.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.20.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.20.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.20.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.20.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.20.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.20.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "dc38ea8d71a5c98efbcbf6a19c05a457" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.21.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.21.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.21.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.21.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.21.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.21.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.21.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.21.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.21.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.21.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.21.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.21.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.21.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.21.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.21.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.21.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.21.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.21.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "5eda43fb42a359d49d919785acb75bfd" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.21.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.21.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.21.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.21.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.21.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.21.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.22.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.22.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.22.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "f79849024eeca1777fabd0e1b6e60e7e" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.22.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.22.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.22.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.22.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.22.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.22.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.22.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.22.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.22.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.22.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.22.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.22.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.22.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.22.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.22.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.22.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.22.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "23b391c8559a7622a02c9c70d7257547" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.22.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.22.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.22.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.22.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.23.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.23.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.23.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.23.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.23.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.23.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.23.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.23.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.23.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.23.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.23.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.23.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "b5a3cea8894cf82473c12ffe483e85ac" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.23.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.23.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.23.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.23.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.23.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.23.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.23.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.23.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.23.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.23.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.23.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.23.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "5cd5a504ca26855575eda72df83ca8c4" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.24.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.24.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.24.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.24.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.24.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.24.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.24.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.24.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.24.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.24.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.24.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.24.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.24.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.24.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.24.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.24.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.24.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.24.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "d2e12273d8da0446d19cfa7149b50f9f" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.24.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.24.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.24.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.24.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.24.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.24.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.25.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.25.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.25.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "19e431783b18bc5eab944097db24e6d7" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.25.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.25.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.25.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.25.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.25.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.25.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.25.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.25.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.25.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.25.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.25.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.25.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.25.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.25.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.25.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.25.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.25.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "a40ad24f83ffbd83d42afe03209d076c" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.25.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.25.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.25.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.25.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.26.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.26.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.26.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.26.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.26.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.26.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.26.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.26.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.26.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.26.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.26.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.26.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "31f1139c03f346a2d7913d3a83b95af5" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.26.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.26.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.26.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.26.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.26.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.26.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.26.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.26.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.26.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.26.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.26.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.26.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "edfa3da2983adacfd350153246596d4a" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.27.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.27.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.27.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.27.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.27.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.27.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.27.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.27.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.27.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.27.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.27.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.27.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.27.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.27.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.27.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.27.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.27.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.27.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "bde65c886566c0836d171f2cd0011bf0" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.27.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.27.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.27.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.27.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.27.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.27.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.28.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.28.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.28.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "bcb7619fc22e4df1bc6dc2738545f346" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.28.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.28.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.28.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.28.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.28.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.28.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.28.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.28.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.28.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.28.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.28.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.28.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.28.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.28.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.28.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.28.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.28.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "71066aeef89858b3f85b0e4315378643" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 32791040, + "records": [ + { + "name": "model.decoder.layers.28.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.28.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.28.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.28.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.29.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.29.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16391680 + }, + { + "name": "model.decoder.layers.29.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19668480 + }, + { + "name": "model.decoder.layers.29.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.29.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22947840 + }, + { + "name": "model.decoder.layers.29.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22950400 + }, + { + "name": "model.decoder.layers.29.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.29.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.29.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.29.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.29.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "model.decoder.layers.29.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + } + ], + "md5sum": "660f4f6aefbc39bd977c43c834e229d9" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32796160, + "records": [ + { + "name": "model.decoder.layers.29.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.29.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.29.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.29.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.29.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.29.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.29.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.29.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 19671040 + }, + { + "name": "model.decoder.layers.29.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.29.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32788480 + }, + { + "name": "model.decoder.layers.29.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32791040 + }, + { + "name": "model.decoder.layers.29.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32793600 + } + ], + "md5sum": "8f789001dd5fbd4db91b0393845af257" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 26240000, + "records": [ + { + "name": "model.decoder.layers.30.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.30.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.30.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6553600 + }, + { + "name": "model.decoder.layers.30.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.30.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 9832960 + }, + { + "name": "model.decoder.layers.30.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9835520 + }, + { + "name": "model.decoder.layers.30.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layers.30.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layers.30.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.30.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.30.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.30.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19673600 + }, + { + "name": "model.decoder.layers.30.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.30.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 22952960 + }, + { + "name": "model.decoder.layers.30.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22955520 + }, + { + "name": "model.decoder.layers.30.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.30.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26234880 + }, + { + "name": "model.decoder.layers.30.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26237440 + } + ], + "md5sum": "e4e3e18a4cb4660fe6f09800e76c9e25" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 32788480, + "records": [ + { + "name": "model.decoder.layers.30.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.30.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.30.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.30.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26224640 + }, + { + "name": "model.decoder.layers.30.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26227200 + }, + { + "name": "model.decoder.layers.30.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 26229760 + }, + { + "name": "model.decoder.layers.31.self_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26232320 + }, + { + "name": "model.decoder.layers.31.self_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29509120 + }, + { + "name": "model.decoder.layers.31.self_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 32785920 + } + ], + "md5sum": "05c80eef162c6e765ce279754a9408c6" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32801280, + "records": [ + { + "name": "model.decoder.layers.31.self_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.31.self_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 3276800 + }, + { + "name": "model.decoder.layers.31.self_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 3279360 + }, + { + "name": "model.decoder.layers.31.self_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6556160 + }, + { + "name": "model.decoder.layers.31.self_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6558720 + }, + { + "name": "model.decoder.layers.31.self_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 6561280 + }, + { + "name": "model.decoder.layers.31.encoder_attn.k_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 6563840 + }, + { + "name": "model.decoder.layers.31.encoder_attn.v_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 9840640 + }, + { + "name": "model.decoder.layers.31.encoder_attn.v_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + }, + { + "name": "model.decoder.layers.31.encoder_attn.q_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 13120000 + }, + { + "name": "model.decoder.layers.31.encoder_attn.q_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 16396800 + }, + { + "name": "model.decoder.layers.31.encoder_attn.out_proj.weight", + "shape": [ + 1280, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16399360 + }, + { + "name": "model.decoder.layers.31.encoder_attn.out_proj.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19676160 + }, + { + "name": "model.decoder.layers.31.encoder_attn_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19678720 + }, + { + "name": "model.decoder.layers.31.encoder_attn_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 19681280 + }, + { + "name": "model.decoder.layers.31.fc1.weight", + "shape": [ + 5120, + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19683840 + }, + { + "name": "model.decoder.layers.31.fc1.bias", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32791040 + } + ], + "md5sum": "7c8c2b171f6edf594198119fc930f2e1" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 13120000, + "records": [ + { + "name": "model.decoder.layers.31.fc2.weight", + "shape": [ + 1280, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.decoder.layers.31.fc2.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13107200 + }, + { + "name": "model.decoder.layers.31.final_layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13109760 + }, + { + "name": "model.decoder.layers.31.final_layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13112320 + }, + { + "name": "model.decoder.layer_norm.weight", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13114880 + }, + { + "name": "model.decoder.layer_norm.bias", + "shape": [ + 1280 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2560, + "byteOffset": 13117440 + } + ], + "md5sum": "88e9fb3b0e4823855fd74c5e6b5823c9" + } + ] +} \ No newline at end of file