{ "metadata": { "ParamSize": 1259, "ParamBytes": 3086981120.0, "BitsPerParam": 15.340189136197463 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 27778560, "records": [ { "name": "model.encoder.conv1.weight", "shape": [ 1280, 128, 3 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 983040, "byteOffset": 0 }, { "name": "model.encoder.conv1.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 983040 }, { "name": "model.encoder.conv2.weight", "shape": [ 1280, 1280, 3 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 985600 }, { "name": "model.encoder.conv2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 10816000 }, { "name": "model.encoder.embed_positions.weight", "shape": [ 1500, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3840000, "byteOffset": 10818560 }, { "name": "model.encoder.layers.0.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14658560 }, { "name": "model.encoder.layers.0.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 17935360 }, { "name": "model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 21212160 }, { "name": "model.encoder.layers.0.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 21214720 }, { "name": "model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 24491520 }, { "name": "model.encoder.layers.0.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24494080 }, { "name": "model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 27770880 }, { "name": "model.encoder.layers.0.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 27773440 }, { "name": "model.encoder.layers.0.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 27776000 } ], "md5sum": "d9c39614c369f40fa5d91361fff5a980" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.0.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.0.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.0.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.0.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.0.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.0.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.1.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.1.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "6ad6c15993b0725bc6a915da5b77c448" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.1.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.1.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.1.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.1.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.1.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.1.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.1.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.1.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.1.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.1.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "6cad2078ea90af500d5e2f40e97fd724" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.2.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.2.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.2.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.2.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.2.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.2.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.2.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.2.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "2cc94b34d529cc603118e9ef505fab0f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.2.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.2.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.2.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.2.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.3.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.3.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.3.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.3.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.3.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.3.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "f75a9bdaab48cb1bab0d864ea4809445" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.3.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.3.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.3.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.3.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.3.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.3.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.4.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.4.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "6d8fbdfcaa9ecc7da4db2572a058bb8f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.4.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.4.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.4.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.4.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.4.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.4.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.4.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.4.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.4.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.4.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "800dce714a51b12eb29967cc1f18a261" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.5.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.5.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.5.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.5.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.5.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.5.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.5.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.5.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "ecd5316d36c2ee5f1d0bd13518b63a51" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.5.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.5.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.5.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.5.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.6.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.6.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.6.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.6.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.6.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.6.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "c53c44c5b3a92732a8fcacd014aa27df" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.6.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.6.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.6.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.6.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.6.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.6.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.7.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.7.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "b9d9d5c31ce835faf70d9606bbe78ce7" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.7.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.7.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.7.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.7.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.7.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.7.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.7.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.7.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.7.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.7.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "9a3c372c25c44804cb5fad1ee9513600" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.8.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.8.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.8.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.8.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.8.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.8.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.8.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.8.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "70687d4e2e299731937e8a299b200d53" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.8.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.8.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.8.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.8.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.9.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.9.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.9.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.9.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.9.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.9.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "1046e750f8945b90ed1311c01abdc65a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.9.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.9.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.9.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.9.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.9.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.9.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.10.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.10.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "5f0f6571552f549417341c9b733518d3" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.10.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.10.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.10.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.10.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.10.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.10.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.10.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.10.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.10.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.10.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "232f5a1b071cd30e57c48ef798ac65ac" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.11.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.11.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.11.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.11.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.11.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.11.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.11.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.11.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "827abf51b707465c609b1749e782f7b7" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.11.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.11.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.11.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.11.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.12.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.12.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.12.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.12.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.12.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.12.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.12.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.12.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.12.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "ef9aa6e8c4294915e522a164475428e3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.12.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.12.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.12.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.12.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.12.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.12.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.13.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.13.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.13.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "33cb59bbae8c74695a79aa6b40784116" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.13.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.13.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.13.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.13.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.13.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.13.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.13.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.13.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.13.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.13.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.13.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.13.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "ae253ee08d9cc43c05e89ea13b9c8a9c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.14.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.14.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.14.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.14.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.14.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.14.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.14.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.14.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.14.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.14.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.14.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "867d34d19965627d4e67b1ff52328db3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.14.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.14.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.14.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.14.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.15.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.15.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.15.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.15.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.15.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.15.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.15.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.15.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.15.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "0722e7aed700446e1322de71c7ea627b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.15.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.15.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.15.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.15.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.15.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.15.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.16.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.16.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.16.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "724cd49c6e3f0b59b06bb1e6eb79cd7a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.16.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.16.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.16.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.16.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.16.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.16.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.16.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.16.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.16.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.16.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.16.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.16.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "2ca0cb8a436117896bfc31539ddbb844" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.17.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.17.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.17.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.17.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.17.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.17.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.17.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.17.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.17.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.17.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.17.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "6db50501724b685960433d38b5e6c0cf" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.17.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.17.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.17.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.17.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.18.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.18.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.18.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.18.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.18.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.18.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.18.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.18.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.18.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "6021489677857845242886ace75c5fe1" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.18.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.18.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.18.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.18.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.18.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.18.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.19.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.19.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.19.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "2b7214718a910a1007837722f6c060a9" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.19.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.19.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.19.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.19.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.19.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.19.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.19.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.19.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.19.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.19.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.19.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.19.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "c13eaa07fd40073e73ef18e39752f172" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.20.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.20.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.20.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.20.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.20.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.20.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.20.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.20.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.20.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.20.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.20.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "f3b5a30a71a4037597d004c7353916cc" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.20.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.20.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.20.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.20.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.21.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.21.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.21.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.21.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.21.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.21.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.21.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.21.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.21.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "d02d8ddcee98ddc073ab41c53dd1c3b6" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.21.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.21.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.21.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.21.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.21.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.21.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.22.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.22.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.22.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "3dcec1ac6e70b5e50415fcbb961e0d6d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.22.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.22.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.22.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.22.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.22.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.22.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.22.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.22.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.22.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.22.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.22.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.22.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "ac1de8fe0f39900c6f03e35e40848eea" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.23.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.23.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.23.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.23.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.23.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.23.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.23.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.23.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.23.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.23.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.23.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "7afbdf1c454b3e54368e26b018405455" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.23.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.23.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.23.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.23.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.24.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.24.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.24.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.24.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.24.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.24.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.24.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.24.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.24.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "9259cf0f5ab20b94909caf938048ce59" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.24.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.24.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.24.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.24.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.24.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.24.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.25.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.25.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.25.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "1d96645578590d3c9775adc8f1d0d735" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.25.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.25.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.25.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.25.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.25.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.25.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.25.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.25.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.25.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.25.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.25.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.25.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "12dcb9b5bdb67881748a240118071581" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.26.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.26.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.26.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.26.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.26.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.26.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.26.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.26.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.26.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.26.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.26.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "6b13614dbf99a27f2da365fbec0ec0af" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.26.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.26.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.26.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.26.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.27.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.27.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.27.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.27.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.27.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.27.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.27.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.27.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.27.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "57dd253b32050c0c1c26b6cf317289ae" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.27.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.27.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.27.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.27.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.27.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.27.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.28.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.28.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.28.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "d1f8da7d674cdbeb5ec2039d2981091d" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.encoder.layers.28.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.28.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.28.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.28.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.28.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.28.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.28.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.28.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.28.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.28.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.28.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.28.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "cc0f53f30774c3a38bcfab411e9f2d94" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26237440, "records": [ { "name": "model.encoder.layers.29.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.29.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.encoder.layers.29.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.encoder.layers.29.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.encoder.layers.29.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.encoder.layers.29.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.encoder.layers.29.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.29.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.encoder.layers.29.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.encoder.layers.29.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13120000 }, { "name": "model.encoder.layers.29.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26227200 } ], "md5sum": "0700d1a3a2f920a30ebf8edbcdcfc361" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 26234880, "records": [ { "name": "model.encoder.layers.29.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.29.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.encoder.layers.29.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.encoder.layers.29.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.encoder.layers.30.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.encoder.layers.30.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.encoder.layers.30.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.encoder.layers.30.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.encoder.layers.30.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.encoder.layers.30.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.encoder.layers.30.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.30.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.30.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 } ], "md5sum": "5302f184be04d191ee6a54eff8f6b7b7" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.encoder.layers.30.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.encoder.layers.30.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.encoder.layers.30.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.encoder.layers.30.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.encoder.layers.30.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.encoder.layers.30.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.encoder.layers.31.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.encoder.layers.31.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.encoder.layers.31.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "738beb16f37be0a45564c29ea23e05b2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 132776960, "records": [ { "name": "model.decoder.embed_tokens.weight", "shape": [ 51866, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 132776960, "byteOffset": 0 } ], "md5sum": "a9d9ed2d43b66ca36c1860cfc3834797" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.encoder.layers.31.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.encoder.layers.31.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.encoder.layers.31.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.encoder.layers.31.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.encoder.layers.31.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.encoder.layers.31.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.encoder.layers.31.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.encoder.layers.31.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.encoder.layers.31.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.encoder.layers.31.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.encoder.layers.31.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.encoder.layers.31.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 }, { "name": "model.encoder.layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32796160 }, { "name": "model.encoder.layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32798720 } ], "md5sum": "598db2ad37c2287a61e379ae5dbe9a82" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 27386880, "records": [ { "name": "model.decoder.embed_positions.weight", "shape": [ 448, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1146880, "byteOffset": 0 }, { "name": "model.decoder.layers.0.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 1146880 }, { "name": "model.decoder.layers.0.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 4423680 }, { "name": "model.decoder.layers.0.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 7700480 }, { "name": "model.decoder.layers.0.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 7703040 }, { "name": "model.decoder.layers.0.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 10979840 }, { "name": "model.decoder.layers.0.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 10982400 }, { "name": "model.decoder.layers.0.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 14259200 }, { "name": "model.decoder.layers.0.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 14261760 }, { "name": "model.decoder.layers.0.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 14264320 }, { "name": "model.decoder.layers.0.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 14266880 }, { "name": "model.decoder.layers.0.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 17543680 }, { "name": "model.decoder.layers.0.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 20820480 }, { "name": "model.decoder.layers.0.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 20823040 }, { "name": "model.decoder.layers.0.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 24099840 }, { "name": "model.decoder.layers.0.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 24102400 }, { "name": "model.decoder.layers.0.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 27379200 }, { "name": "model.decoder.layers.0.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 27381760 }, { "name": "model.decoder.layers.0.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 27384320 } ], "md5sum": "300616b31bc4f70cd9c9b9b1d7db30af" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.0.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.0.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.0.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.0.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.0.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.0.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.1.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.1.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.1.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "8b33bc60291f73b24acf2ee130373226" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.1.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.1.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.1.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.1.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.1.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.1.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.1.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.1.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.1.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.1.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.1.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.1.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.1.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.1.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.1.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.1.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.1.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "a445d6537e820d6a0df37961949da29b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.1.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.1.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.1.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.1.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.2.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.2.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.2.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.2.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.2.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.2.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.2.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.2.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.2.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.2.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.2.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.2.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "e6866029fd87980b2b62ee6ac62699dd" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.2.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.2.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.2.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.2.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.2.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.2.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.2.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.2.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.2.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.2.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.2.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.2.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "932712ec6747d7bc912f54bb676dcdf1" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.3.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.3.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.3.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.3.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.3.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.3.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.3.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.3.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.3.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.3.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.3.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.3.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.3.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.3.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.3.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.3.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.3.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.3.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "8b08e09cb05a2932eafb58709d22448a" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.3.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.3.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.3.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.3.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.3.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.3.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.4.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.4.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.4.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "89940af43031e4275d67b686878e90b4" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.4.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.4.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.4.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.4.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.4.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.4.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.4.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.4.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.4.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.4.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.4.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.4.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.4.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.4.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.4.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.4.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.4.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "c2667de5a74c0c360a2ff2870647222a" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.4.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.4.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.4.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.4.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.5.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.5.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.5.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.5.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.5.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.5.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.5.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.5.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.5.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.5.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.5.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.5.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "4f658c2dbd41f8af715d91ed42f48fde" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.5.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.5.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.5.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.5.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.5.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.5.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.5.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.5.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.5.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.5.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.5.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.5.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "cdc726013fcc2b0ebb19115a38cbd935" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.6.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.6.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.6.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.6.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.6.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.6.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.6.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.6.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.6.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.6.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.6.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.6.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.6.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.6.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.6.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.6.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.6.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.6.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "9483ce32ddc7e9d74aae55ca15fb0b34" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.6.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.6.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.6.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.6.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.6.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.6.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.7.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.7.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.7.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "6e5e7247c995317c79d8760b45227e03" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.7.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.7.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.7.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.7.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.7.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.7.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.7.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.7.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.7.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.7.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.7.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.7.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.7.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.7.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.7.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.7.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.7.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "050c7a3370fffbfcf21ae4caac80d410" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.7.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.7.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.7.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.7.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.8.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.8.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.8.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.8.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.8.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.8.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.8.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.8.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.8.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.8.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.8.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.8.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "c37f97e4fbbabfb18a27ff9f7030bf47" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.8.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.8.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.8.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.8.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.8.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.8.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.8.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.8.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.8.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.8.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.8.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.8.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "720514143786f0de9f8fe6a5b18054db" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.9.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.9.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.9.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.9.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.9.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.9.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.9.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.9.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.9.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.9.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.9.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.9.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.9.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.9.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.9.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.9.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.9.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.9.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "417cbbbff1a115ec20e9ebaf4c300f0c" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.9.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.9.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.9.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.9.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.9.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.9.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.10.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.10.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.10.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "f1f9913d678bdf56b63e466c692df469" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.10.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.10.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.10.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.10.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.10.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.10.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.10.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.10.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.10.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.10.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.10.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.10.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.10.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.10.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.10.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.10.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.10.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "a93d8a2278a731d19ed60a737db1fb47" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.10.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.10.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.10.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.10.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.11.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.11.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.11.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.11.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.11.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.11.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.11.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.11.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.11.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.11.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.11.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.11.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "684334211a2d3fc285e1a2b9974fb85d" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.11.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.11.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.11.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.11.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.11.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.11.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.11.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.11.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.11.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.11.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.11.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.11.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "3d5f8792be5a7e338ef2fd55282ce8bc" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.12.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.12.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.12.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.12.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.12.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.12.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.12.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.12.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.12.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.12.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.12.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.12.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.12.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.12.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.12.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.12.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.12.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.12.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "8b244015460e5469f308b28033a217dc" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.12.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.12.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.12.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.12.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.12.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.12.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.13.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.13.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.13.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "18796ca85dfde359382f825a3c97f159" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.13.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.13.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.13.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.13.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.13.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.13.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.13.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.13.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.13.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.13.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.13.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.13.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.13.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.13.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.13.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.13.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.13.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "0bf3a88f35cee4db5157a6776c690ed9" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.13.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.13.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.13.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.13.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.14.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.14.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.14.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.14.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.14.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.14.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.14.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.14.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.14.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.14.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.14.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.14.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "3603257cac4fef732389f6ece6d7751f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.14.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.14.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.14.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.14.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.14.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.14.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.14.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.14.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.14.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.14.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.14.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.14.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "e40b18ae72348340a9182cb96e856dc9" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.15.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.15.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.15.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.15.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.15.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.15.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.15.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.15.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.15.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.15.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.15.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.15.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.15.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.15.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.15.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.15.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.15.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.15.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "090d61b068beb02f91d3b31ab1e5ee0c" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.15.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.15.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.15.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.15.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.15.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.15.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.16.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.16.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.16.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "894008067e6fa4893e997e20e64b97b8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.16.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.16.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.16.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.16.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.16.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.16.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.16.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.16.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.16.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.16.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.16.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.16.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.16.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.16.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.16.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.16.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.16.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "94536372286f3094c2acfc956d7711db" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.16.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.16.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.16.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.16.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.17.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.17.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.17.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.17.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.17.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.17.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.17.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.17.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.17.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.17.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.17.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.17.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "c6ebb453464b38eddc5c5fd18e4be89b" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.17.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.17.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.17.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.17.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.17.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.17.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.17.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.17.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.17.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.17.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.17.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.17.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "07c5b66f9812636a0d1063b88ee9834b" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.18.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.18.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.18.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.18.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.18.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.18.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.18.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.18.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.18.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.18.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.18.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.18.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.18.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.18.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.18.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.18.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.18.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.18.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "90d65223683eaa8c4fefb604f4f71b66" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.18.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.18.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.18.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.18.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.18.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.18.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.19.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.19.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.19.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "5a9a380357798af6ab51a7d846a68ae7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.19.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.19.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.19.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.19.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.19.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.19.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.19.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.19.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.19.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.19.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.19.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.19.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.19.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.19.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.19.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.19.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.19.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "c2f4f49cf03fd0bcd4f1baf7826c9627" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.19.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.19.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.19.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.19.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.20.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.20.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.20.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.20.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.20.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.20.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.20.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.20.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.20.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.20.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.20.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.20.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "8b5f0c694fedd68d03e51be458d33633" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.20.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.20.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.20.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.20.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.20.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.20.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.20.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.20.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.20.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.20.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.20.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.20.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "dc38ea8d71a5c98efbcbf6a19c05a457" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.21.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.21.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.21.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.21.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.21.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.21.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.21.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.21.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.21.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.21.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.21.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.21.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.21.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.21.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.21.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.21.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.21.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.21.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "5eda43fb42a359d49d919785acb75bfd" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.21.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.21.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.21.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.21.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.21.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.21.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.22.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.22.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.22.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "f79849024eeca1777fabd0e1b6e60e7e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.22.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.22.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.22.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.22.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.22.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.22.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.22.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.22.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.22.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.22.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.22.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.22.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.22.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.22.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.22.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.22.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.22.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "23b391c8559a7622a02c9c70d7257547" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.22.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.22.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.22.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.22.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.23.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.23.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.23.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.23.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.23.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.23.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.23.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.23.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.23.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.23.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.23.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.23.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "b5a3cea8894cf82473c12ffe483e85ac" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.23.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.23.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.23.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.23.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.23.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.23.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.23.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.23.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.23.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.23.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.23.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.23.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "5cd5a504ca26855575eda72df83ca8c4" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.24.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.24.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.24.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.24.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.24.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.24.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.24.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.24.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.24.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.24.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.24.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.24.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.24.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.24.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.24.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.24.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.24.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.24.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "d2e12273d8da0446d19cfa7149b50f9f" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.24.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.24.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.24.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.24.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.24.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.24.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.25.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.25.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.25.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "19e431783b18bc5eab944097db24e6d7" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.25.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.25.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.25.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.25.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.25.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.25.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.25.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.25.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.25.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.25.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.25.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.25.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.25.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.25.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.25.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.25.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.25.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "a40ad24f83ffbd83d42afe03209d076c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.25.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.25.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.25.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.25.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.26.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.26.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.26.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.26.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.26.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.26.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.26.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.26.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.26.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.26.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.26.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.26.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "31f1139c03f346a2d7913d3a83b95af5" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.26.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.26.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.26.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.26.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.26.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.26.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.26.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.26.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.26.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.26.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.26.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.26.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "edfa3da2983adacfd350153246596d4a" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.27.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.27.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.27.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.27.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.27.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.27.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.27.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.27.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.27.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.27.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.27.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.27.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.27.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.27.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.27.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.27.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.27.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.27.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "bde65c886566c0836d171f2cd0011bf0" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.27.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.27.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.27.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.27.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.27.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.27.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.28.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.28.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.28.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "bcb7619fc22e4df1bc6dc2738545f346" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.28.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.28.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.28.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.28.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.28.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.28.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.28.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.28.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.28.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.28.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.28.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.28.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.28.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.28.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.28.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.28.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.28.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "71066aeef89858b3f85b0e4315378643" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 32791040, "records": [ { "name": "model.decoder.layers.28.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.28.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.28.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.28.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.29.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13114880 }, { "name": "model.decoder.layers.29.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16391680 }, { "name": "model.decoder.layers.29.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19668480 }, { "name": "model.decoder.layers.29.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19671040 }, { "name": "model.decoder.layers.29.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22947840 }, { "name": "model.decoder.layers.29.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22950400 }, { "name": "model.decoder.layers.29.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.29.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.29.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.29.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26234880 }, { "name": "model.decoder.layers.29.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "model.decoder.layers.29.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 } ], "md5sum": "660f4f6aefbc39bd977c43c834e229d9" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32796160, "records": [ { "name": "model.decoder.layers.29.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.29.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.29.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.29.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.29.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.29.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.29.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 6563840 }, { "name": "model.decoder.layers.29.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 19671040 }, { "name": "model.decoder.layers.29.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19681280 }, { "name": "model.decoder.layers.29.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32788480 }, { "name": "model.decoder.layers.29.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32791040 }, { "name": "model.decoder.layers.29.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32793600 } ], "md5sum": "8f789001dd5fbd4db91b0393845af257" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 26240000, "records": [ { "name": "model.decoder.layers.30.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.30.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3276800 }, { "name": "model.decoder.layers.30.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6553600 }, { "name": "model.decoder.layers.30.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6556160 }, { "name": "model.decoder.layers.30.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 9832960 }, { "name": "model.decoder.layers.30.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9835520 }, { "name": "model.decoder.layers.30.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layers.30.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layers.30.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.30.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.30.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16396800 }, { "name": "model.decoder.layers.30.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19673600 }, { "name": "model.decoder.layers.30.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 19676160 }, { "name": "model.decoder.layers.30.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 22952960 }, { "name": "model.decoder.layers.30.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 22955520 }, { "name": "model.decoder.layers.30.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26232320 }, { "name": "model.decoder.layers.30.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26234880 }, { "name": "model.decoder.layers.30.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26237440 } ], "md5sum": "e4e3e18a4cb4660fe6f09800e76c9e25" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 32788480, "records": [ { "name": "model.decoder.layers.30.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.30.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13107200 }, { "name": "model.decoder.layers.30.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13117440 }, { "name": "model.decoder.layers.30.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26224640 }, { "name": "model.decoder.layers.30.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26227200 }, { "name": "model.decoder.layers.30.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 26229760 }, { "name": "model.decoder.layers.31.self_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 26232320 }, { "name": "model.decoder.layers.31.self_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 29509120 }, { "name": "model.decoder.layers.31.self_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 32785920 } ], "md5sum": "05c80eef162c6e765ce279754a9408c6" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32801280, "records": [ { "name": "model.decoder.layers.31.self_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 0 }, { "name": "model.decoder.layers.31.self_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 3276800 }, { "name": "model.decoder.layers.31.self_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 3279360 }, { "name": "model.decoder.layers.31.self_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6556160 }, { "name": "model.decoder.layers.31.self_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6558720 }, { "name": "model.decoder.layers.31.self_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 6561280 }, { "name": "model.decoder.layers.31.encoder_attn.k_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 6563840 }, { "name": "model.decoder.layers.31.encoder_attn.v_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9840640 }, { "name": "model.decoder.layers.31.encoder_attn.v_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 }, { "name": "model.decoder.layers.31.encoder_attn.q_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 13120000 }, { "name": "model.decoder.layers.31.encoder_attn.q_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 16396800 }, { "name": "model.decoder.layers.31.encoder_attn.out_proj.weight", "shape": [ 1280, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 16399360 }, { "name": "model.decoder.layers.31.encoder_attn.out_proj.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19676160 }, { "name": "model.decoder.layers.31.encoder_attn_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19678720 }, { "name": "model.decoder.layers.31.encoder_attn_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 19681280 }, { "name": "model.decoder.layers.31.fc1.weight", "shape": [ 5120, 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 19683840 }, { "name": "model.decoder.layers.31.fc1.bias", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32791040 } ], "md5sum": "7c8c2b171f6edf594198119fc930f2e1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 13120000, "records": [ { "name": "model.decoder.layers.31.fc2.weight", "shape": [ 1280, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.decoder.layers.31.fc2.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13107200 }, { "name": "model.decoder.layers.31.final_layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13109760 }, { "name": "model.decoder.layers.31.final_layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13112320 }, { "name": "model.decoder.layer_norm.weight", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13114880 }, { "name": "model.decoder.layer_norm.bias", "shape": [ 1280 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2560, "byteOffset": 13117440 } ], "md5sum": "88e9fb3b0e4823855fd74c5e6b5823c9" } ] }