Add token-level timesteps to distil-large-v3-turbo

Browse files

Files changed (10) hide show

distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/analytics/coremldata.bin +1 -1
distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/coremldata.bin +2 -2
distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/metadata.json +13 -2
distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/model.mil +242 -56
distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/weights/weight.bin +1 -1
distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/analytics/coremldata.bin +1 -1
distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/coremldata.bin +2 -2
distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/metadata.json +13 -2
distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/model.mil +242 -56
distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/weights/weight.bin +1 -1

distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/analytics/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:394520c761dbfb69b05c7c2d49380bbece53e92f6bd19ffabcd46f6aaa2193ad
 size 243

 version https://git-lfs.github.com/spec/v1
+oid sha256:013f68d396d51f281bef4db5e0c3b8eb8df147f657830c8df34589875848663d
 size 243

distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9b45ba978c8fcc372a7b52150f83b3ddbe3376ec044e3439e9e227d0076950f
-size 593

 version https://git-lfs.github.com/spec/v1
+oid sha256:db10bd19b00bccc9b7d2a5a8317b6d73be16c01f78b3b3da47040ae604347b11
+size 633

distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/metadata.json CHANGED Viewed

@@ -32,6 +32,16 @@
         "shape" : "[1, 2560, 1, 1]",
         "name" : "value_cache_updates",
         "type" : "MultiArray"
       }
     ],
     "modelParameters" : [
@@ -40,10 +50,11 @@
     "specificationVersion" : 7,
     "mlProgramOperationTypeHistogram" : {
       "Split" : 2,
-      "Concat" : 2,
       "Ios16.rsqrt" : 7,
       "Ios16.mul" : 26,
       "Squeeze" : 1,
       "Ios16.sub" : 8,
       "Transpose" : 1,
       "Ios16.conv" : 20,
@@ -51,7 +62,7 @@
       "Ios16.linear" : 1,
       "Ios16.matmul" : 8,
       "Ios16.gelu" : 2,
-      "Ios16.reduceMean" : 14,
       "ExpandDims" : 6,
       "Ios16.batchNorm" : 7,
       "Ios16.gather" : 2,

         "shape" : "[1, 2560, 1, 1]",
         "name" : "value_cache_updates",
         "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
       }
     ],
     "modelParameters" : [
     "specificationVersion" : 7,
     "mlProgramOperationTypeHistogram" : {
       "Split" : 2,
+      "Concat" : 3,
       "Ios16.rsqrt" : 7,
       "Ios16.mul" : 26,
       "Squeeze" : 1,
+      "SliceByIndex" : 40,
       "Ios16.sub" : 8,
       "Transpose" : 1,
       "Ios16.conv" : 20,
       "Ios16.linear" : 1,
       "Ios16.matmul" : 8,
       "Ios16.gelu" : 2,
+      "Ios16.reduceMean" : 15,
       "ExpandDims" : 6,
       "Ios16.batchNorm" : 7,
       "Ios16.gather" : 2,

distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/model.mil CHANGED Viewed

@@ -147,12 +147,12 @@ program(1.0)
             tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_207_cast_fp16, y = var_209_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1500]> var_212_cast_fp16 = softmax(axis = var_56, x = mh_w_5_cast_fp16)[name = tensor<string, []>("op_212_cast_fp16")];
             tensor<int32, [4]> var_213 = const()[name = tensor<string, []>("op_213"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_214_cast_fp16 = reshape(shape = var_213, x = value_3_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
             tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_214_cast_fp16, y = var_212_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
             tensor<int32, [4]> var_217 = const()[name = tensor<string, []>("op_217"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_3_cast_fp16 = reshape(shape = var_217, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
             tensor<int32, [2]> var_221 = const()[name = tensor<string, []>("op_221"), val = tensor<int32, [2]>([1, 1])];
@@ -209,30 +209,30 @@ program(1.0)
             tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_297_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
-            tensor<fp16, [1280]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186403136)))];
-            tensor<fp16, [1280]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186405760)))];
-            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
             tensor<int32, [2]> var_312 = const()[name = tensor<string, []>("op_312"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_314 = const()[name = tensor<string, []>("op_314"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186408384)))];
             tensor<fp16, [1280]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189685248)))];
-            tensor<fp16, [1, 1280, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_314, groups = var_277, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_312, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
             tensor<int32, [2]> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189687872)))];
-            tensor<fp16, [1, 1280, 1, 1]> current_key_cast_fp16 = conv(dilations = var_320, groups = var_277, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_318, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
             tensor<int32, [2]> var_325 = const()[name = tensor<string, []>("op_325"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_327 = const()[name = tensor<string, []>("op_327"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192964736)))];
             tensor<fp16, [1280]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196241600)))];
-            tensor<fp16, [1, 1280, 1, 1]> current_value_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_327, groups = var_277, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_325, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_334_cast_fp16 = mul(x = current_key_cast_fp16, y = var_118_cast_fp16)[name = tensor<string, []>("op_334_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_336_cast_fp16 = mul(x = var_43_cast_fp16_1, y = var_121_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> key_5_cast_fp16 = add(x = var_334_cast_fp16, y = var_336_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
@@ -259,12 +259,12 @@ program(1.0)
             tensor<fp16, [1, 1280, 1, 1]> input_11_cast_fp16 = reshape(shape = var_361, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
             tensor<int32, [2]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_367 = const()[name = tensor<string, []>("op_367"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> obj_19_pad_type_0 = const()[name = tensor<string, []>("obj_19_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> obj_19_pad_0 = const()[name = tensor<string, []>("obj_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196244224)))];
             tensor<fp16, [1280]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199521088)))];
-            tensor<fp16, [1, 1280, 1, 1]> obj_19_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_367, groups = var_277, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = var_365, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_19_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_19_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
             tensor<int32, [1]> var_377 = const()[name = tensor<string, []>("op_377"), val = tensor<int32, [1]>([1])];
             tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_377, keep_dims = var_278, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
@@ -276,17 +276,17 @@ program(1.0)
             tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_384_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
-            tensor<fp16, [1280]> obj_21_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199523712)))];
-            tensor<fp16, [1280]> obj_21_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_21_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199526336)))];
-            tensor<fp16, []> obj_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
             tensor<int32, [2]> var_399 = const()[name = tensor<string, []>("op_399"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_401 = const()[name = tensor<string, []>("op_401"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199528960)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202805824)))];
-            tensor<fp16, [1, 1280, 1, 1]> query_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_401, groups = var_277, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_399, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
             tensor<int32, [2]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
@@ -309,81 +309,267 @@ program(1.0)
             tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_421_cast_fp16, y = var_423_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1500]> var_426_cast_fp16 = softmax(axis = var_270, x = mh_w_cast_fp16)[name = tensor<string, []>("op_426_cast_fp16")];
             tensor<int32, [4]> var_427 = const()[name = tensor<string, []>("op_427"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_428_cast_fp16 = reshape(shape = var_427, x = value_cast_fp16)[name = tensor<string, []>("op_428_cast_fp16")];
             tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_428_cast_fp16, y = var_426_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
             tensor<int32, [4]> var_431 = const()[name = tensor<string, []>("op_431"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_13_cast_fp16 = reshape(shape = var_431, x = attn_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
             tensor<int32, [2]> var_435 = const()[name = tensor<string, []>("op_435"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_437 = const()[name = tensor<string, []>("op_437"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> obj_23_pad_type_0 = const()[name = tensor<string, []>("obj_23_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> obj_23_pad_0 = const()[name = tensor<string, []>("obj_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209364800)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212641664)))];
-            tensor<fp16, [1, 1280, 1, 1]> obj_23_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_437, groups = var_277, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = var_435, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_23_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
-            tensor<int32, [1]> var_443 = const()[name = tensor<string, []>("op_443"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_443, keep_dims = var_278, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
-            tensor<int32, [1]> var_447 = const()[name = tensor<string, []>("op_447"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> var_448_cast_fp16 = reduce_mean(axes = var_447, keep_dims = var_278, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_448_cast_fp16")];
-            tensor<fp16, []> var_449_to_fp16 = const()[name = tensor<string, []>("op_449_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1, 1, 1]> var_450_cast_fp16 = add(x = var_448_cast_fp16, y = var_449_to_fp16)[name = tensor<string, []>("op_450_cast_fp16")];
             tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_450_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
             tensor<fp16, [1280]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212644288)))];
             tensor<fp16, [1280]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212646912)))];
             tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
-            tensor<int32, [2]> var_461 = const()[name = tensor<string, []>("op_461"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_463 = const()[name = tensor<string, []>("op_463"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212649536)))];
             tensor<fp16, [5120]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225756800)))];
-            tensor<fp16, [1, 5120, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_463, groups = var_277, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_461, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
             tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
             tensor<fp16, [1, 5120, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
-            tensor<int32, [2]> var_469 = const()[name = tensor<string, []>("op_469"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225767104)))];
             tensor<fp16, [1280]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238874368)))];
-            tensor<fp16, [1, 1280, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_471, groups = var_277, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_469, weight = layers_1_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> inputs_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
-            tensor<bool, []> var_481 = const()[name = tensor<string, []>("op_481"), val = tensor<bool, []>(true)];
-            tensor<int32, [1]> var_485 = const()[name = tensor<string, []>("op_485"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_485, keep_dims = var_481, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
-            tensor<int32, [1]> var_489 = const()[name = tensor<string, []>("op_489"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> var_490_cast_fp16 = reduce_mean(axes = var_489, keep_dims = var_481, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
-            tensor<fp16, []> var_491_to_fp16 = const()[name = tensor<string, []>("op_491_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1, 1, 1]> var_492_cast_fp16 = add(x = var_490_cast_fp16, y = var_491_to_fp16)[name = tensor<string, []>("op_492_cast_fp16")];
             tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_492_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
             tensor<fp16, [1280]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238876992)))];
             tensor<fp16, [1280]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238879616)))];
             tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
-            tensor<int32, [1]> var_502_axes_0 = const()[name = tensor<string, []>("op_502_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1280, 1]> var_502_cast_fp16 = squeeze(axes = var_502_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_502_cast_fp16")];
-            tensor<int32, [3]> var_505_perm_0 = const()[name = tensor<string, []>("op_505_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [51866]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51866]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238882240)))];
-            tensor<fp16, [1, 1, 1280]> transpose_0 = transpose(perm = var_505_perm_0, x = var_502_cast_fp16)[name = tensor<string, []>("transpose_0")];
             tensor<fp16, [1, 1, 51866]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
-            tensor<int32, []> var_509 = const()[name = tensor<string, []>("op_509"), val = tensor<int32, []>(1)];
-            tensor<bool, []> obj_27_interleave_0 = const()[name = tensor<string, []>("obj_27_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 2560, 1, 1]> key_cache_updates = concat(axis = var_509, interleave = obj_27_interleave_0, values = (current_key_1_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_27_cast_fp16")];
-            tensor<int32, []> var_512 = const()[name = tensor<string, []>("op_512"), val = tensor<int32, []>(1)];
-            tensor<bool, []> obj_interleave_0 = const()[name = tensor<string, []>("obj_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 2560, 1, 1]> value_cache_updates = concat(axis = var_512, interleave = obj_interleave_0, values = (current_value_1_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_cast_fp16")];
-        } -> (logits, key_cache_updates, value_cache_updates);
 }

             tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_207_cast_fp16, y = var_209_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_56, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
             tensor<int32, [4]> var_213 = const()[name = tensor<string, []>("op_213"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_214_cast_fp16 = reshape(shape = var_213, x = value_3_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
             tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_214_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
             tensor<int32, [4]> var_217 = const()[name = tensor<string, []>("op_217"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_3_cast_fp16 = reshape(shape = var_217, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
             tensor<int32, [2]> var_221 = const()[name = tensor<string, []>("op_221"), val = tensor<int32, [2]>([1, 1])];
             tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_297_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [1280]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186403136)))];
+            tensor<fp16, [1280]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186405760)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
             tensor<int32, [2]> var_312 = const()[name = tensor<string, []>("op_312"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_314 = const()[name = tensor<string, []>("op_314"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186408384)))];
             tensor<fp16, [1280]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189685248)))];
+            tensor<fp16, [1, 1280, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_314, groups = var_277, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_312, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
             tensor<int32, [2]> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189687872)))];
+            tensor<fp16, [1, 1280, 1, 1]> current_key_cast_fp16 = conv(dilations = var_320, groups = var_277, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_318, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
             tensor<int32, [2]> var_325 = const()[name = tensor<string, []>("op_325"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_327 = const()[name = tensor<string, []>("op_327"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192964736)))];
             tensor<fp16, [1280]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196241600)))];
+            tensor<fp16, [1, 1280, 1, 1]> current_value_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_327, groups = var_277, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_325, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_334_cast_fp16 = mul(x = current_key_cast_fp16, y = var_118_cast_fp16)[name = tensor<string, []>("op_334_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_336_cast_fp16 = mul(x = var_43_cast_fp16_1, y = var_121_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> key_5_cast_fp16 = add(x = var_334_cast_fp16, y = var_336_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> input_11_cast_fp16 = reshape(shape = var_361, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
             tensor<int32, [2]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_367 = const()[name = tensor<string, []>("op_367"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196244224)))];
             tensor<fp16, [1280]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199521088)))];
+            tensor<fp16, [1, 1280, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_367, groups = var_277, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = var_365, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
             tensor<int32, [1]> var_377 = const()[name = tensor<string, []>("op_377"), val = tensor<int32, [1]>([1])];
             tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_377, keep_dims = var_278, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
             tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_384_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [1280]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199523712)))];
+            tensor<fp16, [1280]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199526336)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
             tensor<int32, [2]> var_399 = const()[name = tensor<string, []>("op_399"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_401 = const()[name = tensor<string, []>("op_401"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199528960)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202805824)))];
+            tensor<fp16, [1, 1280, 1, 1]> query_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_401, groups = var_277, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_399, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
             tensor<int32, [2]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_421_cast_fp16, y = var_423_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_270, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
             tensor<int32, [4]> var_427 = const()[name = tensor<string, []>("op_427"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_428_cast_fp16 = reshape(shape = var_427, x = value_cast_fp16)[name = tensor<string, []>("op_428_cast_fp16")];
             tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_428_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
             tensor<int32, [4]> var_431 = const()[name = tensor<string, []>("op_431"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_13_cast_fp16 = reshape(shape = var_431, x = attn_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
             tensor<int32, [2]> var_435 = const()[name = tensor<string, []>("op_435"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_437 = const()[name = tensor<string, []>("op_437"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209364800)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212641664)))];
+            tensor<fp16, [1, 1280, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_437, groups = var_277, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = var_435, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_446 = const()[name = tensor<string, []>("op_446"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_446, keep_dims = var_278, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_450 = const()[name = tensor<string, []>("op_450"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_451_cast_fp16 = reduce_mean(axes = var_450, keep_dims = var_278, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_451_cast_fp16")];
+            tensor<fp16, []> var_452_to_fp16 = const()[name = tensor<string, []>("op_452_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_453_cast_fp16 = add(x = var_451_cast_fp16, y = var_452_to_fp16)[name = tensor<string, []>("op_453_cast_fp16")];
             tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_453_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
             tensor<fp16, [1280]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212644288)))];
             tensor<fp16, [1280]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212646912)))];
             tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_464 = const()[name = tensor<string, []>("op_464"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_466 = const()[name = tensor<string, []>("op_466"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212649536)))];
             tensor<fp16, [5120]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225756800)))];
+            tensor<fp16, [1, 5120, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_466, groups = var_277, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_464, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
             tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
             tensor<fp16, [1, 5120, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_472 = const()[name = tensor<string, []>("op_472"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_474 = const()[name = tensor<string, []>("op_474"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225767104)))];
             tensor<fp16, [1280]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238874368)))];
+            tensor<fp16, [1, 1280, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_474, groups = var_277, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_472, weight = layers_1_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> inputs_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_485 = const()[name = tensor<string, []>("op_485"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_489 = const()[name = tensor<string, []>("op_489"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_489, keep_dims = var_485, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_493 = const()[name = tensor<string, []>("op_493"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_494_cast_fp16 = reduce_mean(axes = var_493, keep_dims = var_485, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
+            tensor<fp16, []> var_495_to_fp16 = const()[name = tensor<string, []>("op_495_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_496_cast_fp16 = add(x = var_494_cast_fp16, y = var_495_to_fp16)[name = tensor<string, []>("op_496_cast_fp16")];
             tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_496_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
             tensor<fp16, [1280]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238876992)))];
             tensor<fp16, [1280]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238879616)))];
             tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_506_axes_0 = const()[name = tensor<string, []>("op_506_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1]> var_506_cast_fp16 = squeeze(axes = var_506_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_506_cast_fp16")];
+            tensor<int32, [3]> var_509_perm_0 = const()[name = tensor<string, []>("op_509_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [51866]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51866]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238882240)))];
+            tensor<fp16, [1, 1, 1280]> transpose_0 = transpose(perm = var_509_perm_0, x = var_506_cast_fp16)[name = tensor<string, []>("transpose_0")];
             tensor<fp16, [1, 1, 51866]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_513 = const()[name = tensor<string, []>("op_513"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_31_interleave_0 = const()[name = tensor<string, []>("obj_31_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 2560, 1, 1]> key_cache_updates = concat(axis = var_513, interleave = obj_31_interleave_0, values = (current_key_1_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_31_cast_fp16")];
+            tensor<int32, []> var_516 = const()[name = tensor<string, []>("op_516"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_33_interleave_0 = const()[name = tensor<string, []>("obj_33_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 2560, 1, 1]> value_cache_updates = concat(axis = var_516, interleave = obj_33_interleave_0, values = (current_value_1_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_33_cast_fp16")];
+            tensor<int32, [4]> var_527_begin_0 = const()[name = tensor<string, []>("op_527_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_527_end_0 = const()[name = tensor<string, []>("op_527_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_527_end_mask_0 = const()[name = tensor<string, []>("op_527_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_527_cast_fp16 = slice_by_index(begin = var_527_begin_0, end = var_527_end_0, end_mask = var_527_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<int32, [4]> var_530_begin_0 = const()[name = tensor<string, []>("op_530_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_530_end_0 = const()[name = tensor<string, []>("op_530_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_530_end_mask_0 = const()[name = tensor<string, []>("op_530_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_530_squeeze_mask_0 = const()[name = tensor<string, []>("op_530_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, squeeze_mask = var_530_squeeze_mask_0, x = var_527_cast_fp16)[name = tensor<string, []>("op_530_cast_fp16")];
+            tensor<int32, [4]> var_545_begin_0 = const()[name = tensor<string, []>("op_545_begin_0"), val = tensor<int32, [4]>([0, 1, 0, 0])];
+            tensor<int32, [4]> var_545_end_0 = const()[name = tensor<string, []>("op_545_end_0"), val = tensor<int32, [4]>([1, 2, 1, 1500])];
+            tensor<bool, [4]> var_545_end_mask_0 = const()[name = tensor<string, []>("op_545_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_545_cast_fp16 = slice_by_index(begin = var_545_begin_0, end = var_545_end_0, end_mask = var_545_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_545_cast_fp16")];
+            tensor<int32, [4]> var_548_begin_0 = const()[name = tensor<string, []>("op_548_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_548_end_0 = const()[name = tensor<string, []>("op_548_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_548_end_mask_0 = const()[name = tensor<string, []>("op_548_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_548_squeeze_mask_0 = const()[name = tensor<string, []>("op_548_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_548_cast_fp16 = slice_by_index(begin = var_548_begin_0, end = var_548_end_0, end_mask = var_548_end_mask_0, squeeze_mask = var_548_squeeze_mask_0, x = var_545_cast_fp16)[name = tensor<string, []>("op_548_cast_fp16")];
+            tensor<int32, [4]> var_563_begin_0 = const()[name = tensor<string, []>("op_563_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_563_end_0 = const()[name = tensor<string, []>("op_563_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1500])];
+            tensor<bool, [4]> var_563_end_mask_0 = const()[name = tensor<string, []>("op_563_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_563_cast_fp16 = slice_by_index(begin = var_563_begin_0, end = var_563_end_0, end_mask = var_563_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<int32, [4]> var_566_begin_0 = const()[name = tensor<string, []>("op_566_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_566_end_0 = const()[name = tensor<string, []>("op_566_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_566_end_mask_0 = const()[name = tensor<string, []>("op_566_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_566_squeeze_mask_0 = const()[name = tensor<string, []>("op_566_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, squeeze_mask = var_566_squeeze_mask_0, x = var_563_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<int32, [4]> var_581_begin_0 = const()[name = tensor<string, []>("op_581_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_581_end_0 = const()[name = tensor<string, []>("op_581_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1500])];
+            tensor<bool, [4]> var_581_end_mask_0 = const()[name = tensor<string, []>("op_581_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = var_581_end_0, end_mask = var_581_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_581_cast_fp16")];
+            tensor<int32, [4]> var_584_begin_0 = const()[name = tensor<string, []>("op_584_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_584_end_0 = const()[name = tensor<string, []>("op_584_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_584_end_mask_0 = const()[name = tensor<string, []>("op_584_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_584_squeeze_mask_0 = const()[name = tensor<string, []>("op_584_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_584_cast_fp16 = slice_by_index(begin = var_584_begin_0, end = var_584_end_0, end_mask = var_584_end_mask_0, squeeze_mask = var_584_squeeze_mask_0, x = var_581_cast_fp16)[name = tensor<string, []>("op_584_cast_fp16")];
+            tensor<int32, [4]> var_599_begin_0 = const()[name = tensor<string, []>("op_599_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_599_end_0 = const()[name = tensor<string, []>("op_599_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_599_end_mask_0 = const()[name = tensor<string, []>("op_599_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_599_cast_fp16 = slice_by_index(begin = var_599_begin_0, end = var_599_end_0, end_mask = var_599_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_599_cast_fp16")];
+            tensor<int32, [4]> var_602_begin_0 = const()[name = tensor<string, []>("op_602_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_602_end_0 = const()[name = tensor<string, []>("op_602_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_602_end_mask_0 = const()[name = tensor<string, []>("op_602_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_602_squeeze_mask_0 = const()[name = tensor<string, []>("op_602_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_602_cast_fp16 = slice_by_index(begin = var_602_begin_0, end = var_602_end_0, end_mask = var_602_end_mask_0, squeeze_mask = var_602_squeeze_mask_0, x = var_599_cast_fp16)[name = tensor<string, []>("op_602_cast_fp16")];
+            tensor<int32, [4]> var_617_begin_0 = const()[name = tensor<string, []>("op_617_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_617_end_0 = const()[name = tensor<string, []>("op_617_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1500])];
+            tensor<bool, [4]> var_617_end_mask_0 = const()[name = tensor<string, []>("op_617_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_617_cast_fp16 = slice_by_index(begin = var_617_begin_0, end = var_617_end_0, end_mask = var_617_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<int32, [4]> var_620_begin_0 = const()[name = tensor<string, []>("op_620_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_620_end_0 = const()[name = tensor<string, []>("op_620_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_620_end_mask_0 = const()[name = tensor<string, []>("op_620_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_620_squeeze_mask_0 = const()[name = tensor<string, []>("op_620_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_620_cast_fp16 = slice_by_index(begin = var_620_begin_0, end = var_620_end_0, end_mask = var_620_end_mask_0, squeeze_mask = var_620_squeeze_mask_0, x = var_617_cast_fp16)[name = tensor<string, []>("op_620_cast_fp16")];
+            tensor<int32, [4]> var_635_begin_0 = const()[name = tensor<string, []>("op_635_begin_0"), val = tensor<int32, [4]>([0, 6, 0, 0])];
+            tensor<int32, [4]> var_635_end_0 = const()[name = tensor<string, []>("op_635_end_0"), val = tensor<int32, [4]>([1, 7, 1, 1500])];
+            tensor<bool, [4]> var_635_end_mask_0 = const()[name = tensor<string, []>("op_635_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_635_cast_fp16 = slice_by_index(begin = var_635_begin_0, end = var_635_end_0, end_mask = var_635_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_635_cast_fp16")];
+            tensor<int32, [4]> var_638_begin_0 = const()[name = tensor<string, []>("op_638_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_638_end_0 = const()[name = tensor<string, []>("op_638_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_638_end_mask_0 = const()[name = tensor<string, []>("op_638_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_638_squeeze_mask_0 = const()[name = tensor<string, []>("op_638_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_638_cast_fp16 = slice_by_index(begin = var_638_begin_0, end = var_638_end_0, end_mask = var_638_end_mask_0, squeeze_mask = var_638_squeeze_mask_0, x = var_635_cast_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
+            tensor<int32, [4]> var_653_begin_0 = const()[name = tensor<string, []>("op_653_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_653_end_0 = const()[name = tensor<string, []>("op_653_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1500])];
+            tensor<bool, [4]> var_653_end_mask_0 = const()[name = tensor<string, []>("op_653_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_653_cast_fp16 = slice_by_index(begin = var_653_begin_0, end = var_653_end_0, end_mask = var_653_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<int32, [4]> var_656_begin_0 = const()[name = tensor<string, []>("op_656_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_656_end_0 = const()[name = tensor<string, []>("op_656_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_656_end_mask_0 = const()[name = tensor<string, []>("op_656_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_656_squeeze_mask_0 = const()[name = tensor<string, []>("op_656_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_656_cast_fp16 = slice_by_index(begin = var_656_begin_0, end = var_656_end_0, end_mask = var_656_end_mask_0, squeeze_mask = var_656_squeeze_mask_0, x = var_653_cast_fp16)[name = tensor<string, []>("op_656_cast_fp16")];
+            tensor<int32, [4]> var_671_begin_0 = const()[name = tensor<string, []>("op_671_begin_0"), val = tensor<int32, [4]>([0, 8, 0, 0])];
+            tensor<int32, [4]> var_671_end_0 = const()[name = tensor<string, []>("op_671_end_0"), val = tensor<int32, [4]>([1, 9, 1, 1500])];
+            tensor<bool, [4]> var_671_end_mask_0 = const()[name = tensor<string, []>("op_671_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_671_cast_fp16 = slice_by_index(begin = var_671_begin_0, end = var_671_end_0, end_mask = var_671_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_671_cast_fp16")];
+            tensor<int32, [4]> var_674_begin_0 = const()[name = tensor<string, []>("op_674_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_674_end_0 = const()[name = tensor<string, []>("op_674_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_674_end_mask_0 = const()[name = tensor<string, []>("op_674_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_674_squeeze_mask_0 = const()[name = tensor<string, []>("op_674_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_674_cast_fp16 = slice_by_index(begin = var_674_begin_0, end = var_674_end_0, end_mask = var_674_end_mask_0, squeeze_mask = var_674_squeeze_mask_0, x = var_671_cast_fp16)[name = tensor<string, []>("op_674_cast_fp16")];
+            tensor<int32, [4]> var_689_begin_0 = const()[name = tensor<string, []>("op_689_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_689_end_0 = const()[name = tensor<string, []>("op_689_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1500])];
+            tensor<bool, [4]> var_689_end_mask_0 = const()[name = tensor<string, []>("op_689_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = var_689_end_0, end_mask = var_689_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_689_cast_fp16")];
+            tensor<int32, [4]> var_692_begin_0 = const()[name = tensor<string, []>("op_692_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_692_end_0 = const()[name = tensor<string, []>("op_692_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_692_end_mask_0 = const()[name = tensor<string, []>("op_692_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_692_squeeze_mask_0 = const()[name = tensor<string, []>("op_692_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_692_cast_fp16 = slice_by_index(begin = var_692_begin_0, end = var_692_end_0, end_mask = var_692_end_mask_0, squeeze_mask = var_692_squeeze_mask_0, x = var_689_cast_fp16)[name = tensor<string, []>("op_692_cast_fp16")];
+            tensor<int32, [4]> var_707_begin_0 = const()[name = tensor<string, []>("op_707_begin_0"), val = tensor<int32, [4]>([0, 10, 0, 0])];
+            tensor<int32, [4]> var_707_end_0 = const()[name = tensor<string, []>("op_707_end_0"), val = tensor<int32, [4]>([1, 11, 1, 1500])];
+            tensor<bool, [4]> var_707_end_mask_0 = const()[name = tensor<string, []>("op_707_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_707_cast_fp16 = slice_by_index(begin = var_707_begin_0, end = var_707_end_0, end_mask = var_707_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_707_cast_fp16")];
+            tensor<int32, [4]> var_710_begin_0 = const()[name = tensor<string, []>("op_710_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_710_end_0 = const()[name = tensor<string, []>("op_710_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_710_end_mask_0 = const()[name = tensor<string, []>("op_710_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_710_squeeze_mask_0 = const()[name = tensor<string, []>("op_710_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_710_cast_fp16 = slice_by_index(begin = var_710_begin_0, end = var_710_end_0, end_mask = var_710_end_mask_0, squeeze_mask = var_710_squeeze_mask_0, x = var_707_cast_fp16)[name = tensor<string, []>("op_710_cast_fp16")];
+            tensor<int32, [4]> var_725_begin_0 = const()[name = tensor<string, []>("op_725_begin_0"), val = tensor<int32, [4]>([0, 11, 0, 0])];
+            tensor<int32, [4]> var_725_end_0 = const()[name = tensor<string, []>("op_725_end_0"), val = tensor<int32, [4]>([1, 12, 1, 1500])];
+            tensor<bool, [4]> var_725_end_mask_0 = const()[name = tensor<string, []>("op_725_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_725_cast_fp16 = slice_by_index(begin = var_725_begin_0, end = var_725_end_0, end_mask = var_725_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_725_cast_fp16")];
+            tensor<int32, [4]> var_728_begin_0 = const()[name = tensor<string, []>("op_728_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_728_end_0 = const()[name = tensor<string, []>("op_728_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_728_end_mask_0 = const()[name = tensor<string, []>("op_728_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_728_squeeze_mask_0 = const()[name = tensor<string, []>("op_728_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_728_cast_fp16 = slice_by_index(begin = var_728_begin_0, end = var_728_end_0, end_mask = var_728_end_mask_0, squeeze_mask = var_728_squeeze_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
+            tensor<int32, [4]> var_743_begin_0 = const()[name = tensor<string, []>("op_743_begin_0"), val = tensor<int32, [4]>([0, 12, 0, 0])];
+            tensor<int32, [4]> var_743_end_0 = const()[name = tensor<string, []>("op_743_end_0"), val = tensor<int32, [4]>([1, 13, 1, 1500])];
+            tensor<bool, [4]> var_743_end_mask_0 = const()[name = tensor<string, []>("op_743_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_743_cast_fp16 = slice_by_index(begin = var_743_begin_0, end = var_743_end_0, end_mask = var_743_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_743_cast_fp16")];
+            tensor<int32, [4]> var_746_begin_0 = const()[name = tensor<string, []>("op_746_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_746_end_0 = const()[name = tensor<string, []>("op_746_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_746_end_mask_0 = const()[name = tensor<string, []>("op_746_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_746_squeeze_mask_0 = const()[name = tensor<string, []>("op_746_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, squeeze_mask = var_746_squeeze_mask_0, x = var_743_cast_fp16)[name = tensor<string, []>("op_746_cast_fp16")];
+            tensor<int32, [4]> var_761_begin_0 = const()[name = tensor<string, []>("op_761_begin_0"), val = tensor<int32, [4]>([0, 13, 0, 0])];
+            tensor<int32, [4]> var_761_end_0 = const()[name = tensor<string, []>("op_761_end_0"), val = tensor<int32, [4]>([1, 14, 1, 1500])];
+            tensor<bool, [4]> var_761_end_mask_0 = const()[name = tensor<string, []>("op_761_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_761_cast_fp16 = slice_by_index(begin = var_761_begin_0, end = var_761_end_0, end_mask = var_761_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_761_cast_fp16")];
+            tensor<int32, [4]> var_764_begin_0 = const()[name = tensor<string, []>("op_764_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_764_end_0 = const()[name = tensor<string, []>("op_764_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_764_end_mask_0 = const()[name = tensor<string, []>("op_764_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_764_squeeze_mask_0 = const()[name = tensor<string, []>("op_764_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_764_cast_fp16 = slice_by_index(begin = var_764_begin_0, end = var_764_end_0, end_mask = var_764_end_mask_0, squeeze_mask = var_764_squeeze_mask_0, x = var_761_cast_fp16)[name = tensor<string, []>("op_764_cast_fp16")];
+            tensor<int32, [4]> var_779_begin_0 = const()[name = tensor<string, []>("op_779_begin_0"), val = tensor<int32, [4]>([0, 14, 0, 0])];
+            tensor<int32, [4]> var_779_end_0 = const()[name = tensor<string, []>("op_779_end_0"), val = tensor<int32, [4]>([1, 15, 1, 1500])];
+            tensor<bool, [4]> var_779_end_mask_0 = const()[name = tensor<string, []>("op_779_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_779_cast_fp16 = slice_by_index(begin = var_779_begin_0, end = var_779_end_0, end_mask = var_779_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_779_cast_fp16")];
+            tensor<int32, [4]> var_782_begin_0 = const()[name = tensor<string, []>("op_782_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_782_end_0 = const()[name = tensor<string, []>("op_782_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_782_end_mask_0 = const()[name = tensor<string, []>("op_782_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_782_squeeze_mask_0 = const()[name = tensor<string, []>("op_782_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_782_cast_fp16 = slice_by_index(begin = var_782_begin_0, end = var_782_end_0, end_mask = var_782_end_mask_0, squeeze_mask = var_782_squeeze_mask_0, x = var_779_cast_fp16)[name = tensor<string, []>("op_782_cast_fp16")];
+            tensor<int32, [4]> var_797_begin_0 = const()[name = tensor<string, []>("op_797_begin_0"), val = tensor<int32, [4]>([0, 15, 0, 0])];
+            tensor<int32, [4]> var_797_end_0 = const()[name = tensor<string, []>("op_797_end_0"), val = tensor<int32, [4]>([1, 16, 1, 1500])];
+            tensor<bool, [4]> var_797_end_mask_0 = const()[name = tensor<string, []>("op_797_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_797_cast_fp16 = slice_by_index(begin = var_797_begin_0, end = var_797_end_0, end_mask = var_797_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_797_cast_fp16")];
+            tensor<int32, [4]> var_800_begin_0 = const()[name = tensor<string, []>("op_800_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_800_end_0 = const()[name = tensor<string, []>("op_800_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_800_end_mask_0 = const()[name = tensor<string, []>("op_800_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_800_squeeze_mask_0 = const()[name = tensor<string, []>("op_800_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_800_cast_fp16 = slice_by_index(begin = var_800_begin_0, end = var_800_end_0, end_mask = var_800_end_mask_0, squeeze_mask = var_800_squeeze_mask_0, x = var_797_cast_fp16)[name = tensor<string, []>("op_800_cast_fp16")];
+            tensor<int32, [4]> var_815_begin_0 = const()[name = tensor<string, []>("op_815_begin_0"), val = tensor<int32, [4]>([0, 16, 0, 0])];
+            tensor<int32, [4]> var_815_end_0 = const()[name = tensor<string, []>("op_815_end_0"), val = tensor<int32, [4]>([1, 17, 1, 1500])];
+            tensor<bool, [4]> var_815_end_mask_0 = const()[name = tensor<string, []>("op_815_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_815_cast_fp16 = slice_by_index(begin = var_815_begin_0, end = var_815_end_0, end_mask = var_815_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<int32, [4]> var_818_begin_0 = const()[name = tensor<string, []>("op_818_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_818_end_0 = const()[name = tensor<string, []>("op_818_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_818_end_mask_0 = const()[name = tensor<string, []>("op_818_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_818_squeeze_mask_0 = const()[name = tensor<string, []>("op_818_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_818_cast_fp16 = slice_by_index(begin = var_818_begin_0, end = var_818_end_0, end_mask = var_818_end_mask_0, squeeze_mask = var_818_squeeze_mask_0, x = var_815_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<int32, [4]> var_833_begin_0 = const()[name = tensor<string, []>("op_833_begin_0"), val = tensor<int32, [4]>([0, 17, 0, 0])];
+            tensor<int32, [4]> var_833_end_0 = const()[name = tensor<string, []>("op_833_end_0"), val = tensor<int32, [4]>([1, 18, 1, 1500])];
+            tensor<bool, [4]> var_833_end_mask_0 = const()[name = tensor<string, []>("op_833_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_833_cast_fp16 = slice_by_index(begin = var_833_begin_0, end = var_833_end_0, end_mask = var_833_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_833_cast_fp16")];
+            tensor<int32, [4]> var_836_begin_0 = const()[name = tensor<string, []>("op_836_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_836_end_0 = const()[name = tensor<string, []>("op_836_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_836_end_mask_0 = const()[name = tensor<string, []>("op_836_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_836_squeeze_mask_0 = const()[name = tensor<string, []>("op_836_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_836_cast_fp16 = slice_by_index(begin = var_836_begin_0, end = var_836_end_0, end_mask = var_836_end_mask_0, squeeze_mask = var_836_squeeze_mask_0, x = var_833_cast_fp16)[name = tensor<string, []>("op_836_cast_fp16")];
+            tensor<int32, [4]> var_851_begin_0 = const()[name = tensor<string, []>("op_851_begin_0"), val = tensor<int32, [4]>([0, 18, 0, 0])];
+            tensor<int32, [4]> var_851_end_0 = const()[name = tensor<string, []>("op_851_end_0"), val = tensor<int32, [4]>([1, 19, 1, 1500])];
+            tensor<bool, [4]> var_851_end_mask_0 = const()[name = tensor<string, []>("op_851_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_851_cast_fp16 = slice_by_index(begin = var_851_begin_0, end = var_851_end_0, end_mask = var_851_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<int32, [4]> var_854_begin_0 = const()[name = tensor<string, []>("op_854_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_854_end_0 = const()[name = tensor<string, []>("op_854_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_854_end_mask_0 = const()[name = tensor<string, []>("op_854_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_854_squeeze_mask_0 = const()[name = tensor<string, []>("op_854_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_854_cast_fp16 = slice_by_index(begin = var_854_begin_0, end = var_854_end_0, end_mask = var_854_end_mask_0, squeeze_mask = var_854_squeeze_mask_0, x = var_851_cast_fp16)[name = tensor<string, []>("op_854_cast_fp16")];
+            tensor<int32, [4]> var_869_begin_0 = const()[name = tensor<string, []>("op_869_begin_0"), val = tensor<int32, [4]>([0, 19, 0, 0])];
+            tensor<int32, [4]> var_869_end_0 = const()[name = tensor<string, []>("op_869_end_0"), val = tensor<int32, [4]>([1, 20, 1, 1500])];
+            tensor<bool, [4]> var_869_end_mask_0 = const()[name = tensor<string, []>("op_869_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = var_869_end_0, end_mask = var_869_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<int32, [4]> var_872_begin_0 = const()[name = tensor<string, []>("op_872_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_872_end_0 = const()[name = tensor<string, []>("op_872_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_872_end_mask_0 = const()[name = tensor<string, []>("op_872_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_872_squeeze_mask_0 = const()[name = tensor<string, []>("op_872_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = var_872_end_0, end_mask = var_872_end_mask_0, squeeze_mask = var_872_squeeze_mask_0, x = var_869_cast_fp16)[name = tensor<string, []>("op_872_cast_fp16")];
+            tensor<int32, []> var_879 = const()[name = tensor<string, []>("op_879"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_880_interleave_0 = const()[name = tensor<string, []>("op_880_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 20, 1500]> var_880_cast_fp16 = concat(axis = var_879, interleave = var_880_interleave_0, values = (var_530_cast_fp16, var_548_cast_fp16, var_566_cast_fp16, var_584_cast_fp16, var_602_cast_fp16, var_620_cast_fp16, var_638_cast_fp16, var_656_cast_fp16, var_674_cast_fp16, var_692_cast_fp16, var_710_cast_fp16, var_728_cast_fp16, var_746_cast_fp16, var_764_cast_fp16, var_782_cast_fp16, var_800_cast_fp16, var_818_cast_fp16, var_836_cast_fp16, var_854_cast_fp16, var_872_cast_fp16))[name = tensor<string, []>("op_880_cast_fp16")];
+            tensor<int32, [1]> var_882 = const()[name = tensor<string, []>("op_882"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_883 = const()[name = tensor<string, []>("op_883"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = var_882, keep_dims = var_883, x = var_880_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
 }

distil-whisper_distil-large-v3_turbo/TextDecoder.mlmodelc/weights/weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7415eed816b4f2f69ce64ece61328f9f60f96e2201a45cb01caca6d413cc6e94
 size 238986036

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e8e3906c4f1717fe848f6d45e6e052fc3d6048ec24d3a34e23e1c972b10bc84
 size 238986036

distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/analytics/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:394520c761dbfb69b05c7c2d49380bbece53e92f6bd19ffabcd46f6aaa2193ad
 size 243

 version https://git-lfs.github.com/spec/v1
+oid sha256:013f68d396d51f281bef4db5e0c3b8eb8df147f657830c8df34589875848663d
 size 243

distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9b45ba978c8fcc372a7b52150f83b3ddbe3376ec044e3439e9e227d0076950f
-size 593

 version https://git-lfs.github.com/spec/v1
+oid sha256:db10bd19b00bccc9b7d2a5a8317b6d73be16c01f78b3b3da47040ae604347b11
+size 633

distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/metadata.json CHANGED Viewed

@@ -32,6 +32,16 @@
         "shape" : "[1, 2560, 1, 1]",
         "name" : "value_cache_updates",
         "type" : "MultiArray"
       }
     ],
     "modelParameters" : [
@@ -40,10 +50,11 @@
     "specificationVersion" : 7,
     "mlProgramOperationTypeHistogram" : {
       "Split" : 2,
-      "Concat" : 2,
       "Ios16.rsqrt" : 7,
       "Ios16.mul" : 26,
       "Squeeze" : 1,
       "Ios16.sub" : 8,
       "Transpose" : 1,
       "Ios16.conv" : 20,
@@ -51,7 +62,7 @@
       "Ios16.linear" : 1,
       "Ios16.matmul" : 8,
       "Ios16.gelu" : 2,
-      "Ios16.reduceMean" : 14,
       "ExpandDims" : 6,
       "Ios16.batchNorm" : 7,
       "Ios16.gather" : 2,

         "shape" : "[1, 2560, 1, 1]",
         "name" : "value_cache_updates",
         "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1500]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
       }
     ],
     "modelParameters" : [
     "specificationVersion" : 7,
     "mlProgramOperationTypeHistogram" : {
       "Split" : 2,
+      "Concat" : 3,
       "Ios16.rsqrt" : 7,
       "Ios16.mul" : 26,
       "Squeeze" : 1,
+      "SliceByIndex" : 40,
       "Ios16.sub" : 8,
       "Transpose" : 1,
       "Ios16.conv" : 20,
       "Ios16.linear" : 1,
       "Ios16.matmul" : 8,
       "Ios16.gelu" : 2,
+      "Ios16.reduceMean" : 15,
       "ExpandDims" : 6,
       "Ios16.batchNorm" : 7,
       "Ios16.gather" : 2,

distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/model.mil CHANGED Viewed

@@ -147,12 +147,12 @@ program(1.0)
             tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_207_cast_fp16, y = var_209_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1500]> var_212_cast_fp16 = softmax(axis = var_56, x = mh_w_5_cast_fp16)[name = tensor<string, []>("op_212_cast_fp16")];
             tensor<int32, [4]> var_213 = const()[name = tensor<string, []>("op_213"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_214_cast_fp16 = reshape(shape = var_213, x = value_3_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
             tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_214_cast_fp16, y = var_212_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
             tensor<int32, [4]> var_217 = const()[name = tensor<string, []>("op_217"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_3_cast_fp16 = reshape(shape = var_217, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
             tensor<int32, [2]> var_221 = const()[name = tensor<string, []>("op_221"), val = tensor<int32, [2]>([1, 1])];
@@ -209,30 +209,30 @@ program(1.0)
             tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_297_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
-            tensor<fp16, [1280]> obj_13_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186403136)))];
-            tensor<fp16, [1280]> obj_13_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186405760)))];
-            tensor<fp16, []> obj_13_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_13_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
             tensor<int32, [2]> var_312 = const()[name = tensor<string, []>("op_312"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_314 = const()[name = tensor<string, []>("op_314"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186408384)))];
             tensor<fp16, [1280]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189685248)))];
-            tensor<fp16, [1, 1280, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_314, groups = var_277, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_312, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
             tensor<int32, [2]> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189687872)))];
-            tensor<fp16, [1, 1280, 1, 1]> current_key_cast_fp16 = conv(dilations = var_320, groups = var_277, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_318, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
             tensor<int32, [2]> var_325 = const()[name = tensor<string, []>("op_325"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_327 = const()[name = tensor<string, []>("op_327"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192964736)))];
             tensor<fp16, [1280]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196241600)))];
-            tensor<fp16, [1, 1280, 1, 1]> current_value_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_327, groups = var_277, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_325, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_334_cast_fp16 = mul(x = current_key_cast_fp16, y = var_118_cast_fp16)[name = tensor<string, []>("op_334_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_336_cast_fp16 = mul(x = var_43_cast_fp16_1, y = var_121_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> key_5_cast_fp16 = add(x = var_334_cast_fp16, y = var_336_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
@@ -259,12 +259,12 @@ program(1.0)
             tensor<fp16, [1, 1280, 1, 1]> input_11_cast_fp16 = reshape(shape = var_361, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
             tensor<int32, [2]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_367 = const()[name = tensor<string, []>("op_367"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> obj_19_pad_type_0 = const()[name = tensor<string, []>("obj_19_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> obj_19_pad_0 = const()[name = tensor<string, []>("obj_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196244224)))];
             tensor<fp16, [1280]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199521088)))];
-            tensor<fp16, [1, 1280, 1, 1]> obj_19_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_367, groups = var_277, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = var_365, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_19_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_19_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
             tensor<int32, [1]> var_377 = const()[name = tensor<string, []>("op_377"), val = tensor<int32, [1]>([1])];
             tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_377, keep_dims = var_278, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
@@ -276,17 +276,17 @@ program(1.0)
             tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_384_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
-            tensor<fp16, [1280]> obj_21_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199523712)))];
-            tensor<fp16, [1280]> obj_21_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_21_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199526336)))];
-            tensor<fp16, []> obj_21_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_21_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1280, 1, 1]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
             tensor<int32, [2]> var_399 = const()[name = tensor<string, []>("op_399"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_401 = const()[name = tensor<string, []>("op_401"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199528960)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202805824)))];
-            tensor<fp16, [1, 1280, 1, 1]> query_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_401, groups = var_277, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_399, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
             tensor<int32, [2]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
@@ -309,81 +309,267 @@ program(1.0)
             tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_421_cast_fp16, y = var_423_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
-            tensor<fp16, [1, 20, 1, 1500]> var_426_cast_fp16 = softmax(axis = var_270, x = mh_w_cast_fp16)[name = tensor<string, []>("op_426_cast_fp16")];
             tensor<int32, [4]> var_427 = const()[name = tensor<string, []>("op_427"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_428_cast_fp16 = reshape(shape = var_427, x = value_cast_fp16)[name = tensor<string, []>("op_428_cast_fp16")];
             tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
-            tensor<fp16, [1, 20, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_428_cast_fp16, y = var_426_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
             tensor<int32, [4]> var_431 = const()[name = tensor<string, []>("op_431"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_13_cast_fp16 = reshape(shape = var_431, x = attn_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
             tensor<int32, [2]> var_435 = const()[name = tensor<string, []>("op_435"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_437 = const()[name = tensor<string, []>("op_437"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> obj_23_pad_type_0 = const()[name = tensor<string, []>("obj_23_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> obj_23_pad_0 = const()[name = tensor<string, []>("obj_23_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209364800)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212641664)))];
-            tensor<fp16, [1, 1280, 1, 1]> obj_23_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_437, groups = var_277, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = var_435, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
-            tensor<fp16, [1, 1280, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_23_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
-            tensor<int32, [1]> var_443 = const()[name = tensor<string, []>("op_443"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_443, keep_dims = var_278, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
-            tensor<int32, [1]> var_447 = const()[name = tensor<string, []>("op_447"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> var_448_cast_fp16 = reduce_mean(axes = var_447, keep_dims = var_278, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_448_cast_fp16")];
-            tensor<fp16, []> var_449_to_fp16 = const()[name = tensor<string, []>("op_449_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1, 1, 1]> var_450_cast_fp16 = add(x = var_448_cast_fp16, y = var_449_to_fp16)[name = tensor<string, []>("op_450_cast_fp16")];
             tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_450_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
             tensor<fp16, [1280]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212644288)))];
             tensor<fp16, [1280]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212646912)))];
             tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
-            tensor<int32, [2]> var_461 = const()[name = tensor<string, []>("op_461"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_463 = const()[name = tensor<string, []>("op_463"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212649536)))];
             tensor<fp16, [5120]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225756800)))];
-            tensor<fp16, [1, 5120, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_463, groups = var_277, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_461, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
             tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
             tensor<fp16, [1, 5120, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
-            tensor<int32, [2]> var_469 = const()[name = tensor<string, []>("op_469"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_471 = const()[name = tensor<string, []>("op_471"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225767104)))];
             tensor<fp16, [1280]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238874368)))];
-            tensor<fp16, [1, 1280, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_471, groups = var_277, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_469, weight = layers_1_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> inputs_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
-            tensor<bool, []> var_481 = const()[name = tensor<string, []>("op_481"), val = tensor<bool, []>(true)];
-            tensor<int32, [1]> var_485 = const()[name = tensor<string, []>("op_485"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_485, keep_dims = var_481, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
-            tensor<int32, [1]> var_489 = const()[name = tensor<string, []>("op_489"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 1]> var_490_cast_fp16 = reduce_mean(axes = var_489, keep_dims = var_481, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
-            tensor<fp16, []> var_491_to_fp16 = const()[name = tensor<string, []>("op_491_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 1, 1, 1]> var_492_cast_fp16 = add(x = var_490_cast_fp16, y = var_491_to_fp16)[name = tensor<string, []>("op_492_cast_fp16")];
             tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_492_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
             tensor<fp16, [1280]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238876992)))];
             tensor<fp16, [1280]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238879616)))];
             tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
-            tensor<int32, [1]> var_502_axes_0 = const()[name = tensor<string, []>("op_502_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1280, 1]> var_502_cast_fp16 = squeeze(axes = var_502_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_502_cast_fp16")];
-            tensor<int32, [3]> var_505_perm_0 = const()[name = tensor<string, []>("op_505_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [51866]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51866]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238882240)))];
-            tensor<fp16, [1, 1, 1280]> transpose_0 = transpose(perm = var_505_perm_0, x = var_502_cast_fp16)[name = tensor<string, []>("transpose_0")];
             tensor<fp16, [1, 1, 51866]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
-            tensor<int32, []> var_509 = const()[name = tensor<string, []>("op_509"), val = tensor<int32, []>(1)];
-            tensor<bool, []> obj_27_interleave_0 = const()[name = tensor<string, []>("obj_27_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 2560, 1, 1]> key_cache_updates = concat(axis = var_509, interleave = obj_27_interleave_0, values = (current_key_1_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_27_cast_fp16")];
-            tensor<int32, []> var_512 = const()[name = tensor<string, []>("op_512"), val = tensor<int32, []>(1)];
-            tensor<bool, []> obj_interleave_0 = const()[name = tensor<string, []>("obj_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 2560, 1, 1]> value_cache_updates = concat(axis = var_512, interleave = obj_interleave_0, values = (current_value_1_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_cast_fp16")];
-        } -> (logits, key_cache_updates, value_cache_updates);
 }

             tensor<bool, []> mh_w_5_transpose_x_0 = const()[name = tensor<string, []>("mh_w_5_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_5_transpose_y_0 = const()[name = tensor<string, []>("mh_w_5_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_207_cast_fp16, y = var_209_cast_fp16)[name = tensor<string, []>("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1500]> obj_13_cast_fp16 = softmax(axis = var_56, x = mh_w_5_cast_fp16)[name = tensor<string, []>("obj_13_cast_fp16")];
             tensor<int32, [4]> var_213 = const()[name = tensor<string, []>("op_213"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_214_cast_fp16 = reshape(shape = var_213, x = value_3_cast_fp16)[name = tensor<string, []>("op_214_cast_fp16")];
             tensor<bool, []> attn_3_transpose_x_0 = const()[name = tensor<string, []>("attn_3_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_3_transpose_y_0 = const()[name = tensor<string, []>("attn_3_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_214_cast_fp16, y = obj_13_cast_fp16)[name = tensor<string, []>("attn_3_cast_fp16")];
             tensor<int32, [4]> var_217 = const()[name = tensor<string, []>("op_217"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_3_cast_fp16 = reshape(shape = var_217, x = attn_3_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
             tensor<int32, [2]> var_221 = const()[name = tensor<string, []>("op_221"), val = tensor<int32, [2]>([1, 1])];
             tensor<fp16, []> denom_7_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_7_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_297_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor<string, []>("out_7_cast_fp16")];
+            tensor<fp16, [1280]> obj_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186403136)))];
+            tensor<fp16, [1280]> obj_15_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186405760)))];
+            tensor<fp16, []> obj_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor<string, []>("obj_15_cast_fp16")];
             tensor<int32, [2]> var_312 = const()[name = tensor<string, []>("op_312"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_314 = const()[name = tensor<string, []>("op_314"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_5_pad_type_0 = const()[name = tensor<string, []>("query_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_5_pad_0 = const()[name = tensor<string, []>("query_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(186408384)))];
             tensor<fp16, [1280]> layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189685248)))];
+            tensor<fp16, [1, 1280, 1, 1]> query_5_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_314, groups = var_277, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_312, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("query_5_cast_fp16")];
             tensor<int32, [2]> var_318 = const()[name = tensor<string, []>("op_318"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_320 = const()[name = tensor<string, []>("op_320"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_key_pad_type_0 = const()[name = tensor<string, []>("current_key_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_key_pad_0 = const()[name = tensor<string, []>("current_key_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(189687872)))];
+            tensor<fp16, [1, 1280, 1, 1]> current_key_cast_fp16 = conv(dilations = var_320, groups = var_277, pad = current_key_pad_0, pad_type = current_key_pad_type_0, strides = var_318, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_key_cast_fp16")];
             tensor<int32, [2]> var_325 = const()[name = tensor<string, []>("op_325"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_327 = const()[name = tensor<string, []>("op_327"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> current_value_pad_type_0 = const()[name = tensor<string, []>("current_value_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> current_value_pad_0 = const()[name = tensor<string, []>("current_value_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(192964736)))];
             tensor<fp16, [1280]> layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196241600)))];
+            tensor<fp16, [1, 1280, 1, 1]> current_value_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_327, groups = var_277, pad = current_value_pad_0, pad_type = current_value_pad_type_0, strides = var_325, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_15_cast_fp16)[name = tensor<string, []>("current_value_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_334_cast_fp16 = mul(x = current_key_cast_fp16, y = var_118_cast_fp16)[name = tensor<string, []>("op_334_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> var_336_cast_fp16 = mul(x = var_43_cast_fp16_1, y = var_121_cast_fp16)[name = tensor<string, []>("op_336_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 224]> key_5_cast_fp16 = add(x = var_334_cast_fp16, y = var_336_cast_fp16)[name = tensor<string, []>("key_5_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> input_11_cast_fp16 = reshape(shape = var_361, x = attn_5_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
             tensor<int32, [2]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_367 = const()[name = tensor<string, []>("op_367"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_21_pad_type_0 = const()[name = tensor<string, []>("obj_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_21_pad_0 = const()[name = tensor<string, []>("obj_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(196244224)))];
             tensor<fp16, [1280]> layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199521088)))];
+            tensor<fp16, [1, 1280, 1, 1]> obj_21_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_367, groups = var_277, pad = obj_21_pad_0, pad_type = obj_21_pad_type_0, strides = var_365, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor<string, []>("obj_21_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor<string, []>("inputs_9_cast_fp16")];
             tensor<int32, [1]> var_377 = const()[name = tensor<string, []>("op_377"), val = tensor<int32, [1]>([1])];
             tensor<fp16, [1, 1, 1, 1]> channels_mean_9_cast_fp16 = reduce_mean(axes = var_377, keep_dims = var_278, x = inputs_9_cast_fp16)[name = tensor<string, []>("channels_mean_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor<string, []>("zero_mean_9_cast_fp16")];
             tensor<fp16, []> denom_9_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_9_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
             tensor<fp16, [1, 1, 1, 1]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_384_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor<string, []>("out_9_cast_fp16")];
+            tensor<fp16, [1280]> obj_23_gamma_0_to_fp16 = const()[name = tensor<string, []>("obj_23_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199523712)))];
+            tensor<fp16, [1280]> obj_23_beta_0_to_fp16 = const()[name = tensor<string, []>("obj_23_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199526336)))];
+            tensor<fp16, []> obj_23_epsilon_0_to_fp16 = const()[name = tensor<string, []>("obj_23_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor<string, []>("obj_23_cast_fp16")];
             tensor<int32, [2]> var_399 = const()[name = tensor<string, []>("op_399"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_401 = const()[name = tensor<string, []>("op_401"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> query_pad_type_0 = const()[name = tensor<string, []>("query_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> query_pad_0 = const()[name = tensor<string, []>("query_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(199528960)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_q_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_q_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(202805824)))];
+            tensor<fp16, [1, 1280, 1, 1]> query_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_bias_to_fp16, dilations = var_401, groups = var_277, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_399, weight = layers_1_encoder_attn_q_proj_weight_to_fp16, x = obj_23_cast_fp16)[name = tensor<string, []>("query_cast_fp16")];
             tensor<int32, [2]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_407 = const()[name = tensor<string, []>("op_407"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> key_pad_type_0 = const()[name = tensor<string, []>("key_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<bool, []> mh_w_transpose_x_0 = const()[name = tensor<string, []>("mh_w_transpose_x_0"), val = tensor<bool, []>(true)];
             tensor<bool, []> mh_w_transpose_y_0 = const()[name = tensor<string, []>("mh_w_transpose_y_0"), val = tensor<bool, []>(false)];
             tensor<fp16, [1, 20, 1, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_421_cast_fp16, y = var_423_cast_fp16)[name = tensor<string, []>("mh_w_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1500]> obj_27_cast_fp16 = softmax(axis = var_270, x = mh_w_cast_fp16)[name = tensor<string, []>("obj_27_cast_fp16")];
             tensor<int32, [4]> var_427 = const()[name = tensor<string, []>("op_427"), val = tensor<int32, [4]>([1, 20, 64, -1])];
             tensor<fp16, [1, 20, 64, 1500]> var_428_cast_fp16 = reshape(shape = var_427, x = value_cast_fp16)[name = tensor<string, []>("op_428_cast_fp16")];
             tensor<bool, []> attn_transpose_x_0 = const()[name = tensor<string, []>("attn_transpose_x_0"), val = tensor<bool, []>(false)];
             tensor<bool, []> attn_transpose_y_0 = const()[name = tensor<string, []>("attn_transpose_y_0"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_428_cast_fp16, y = obj_27_cast_fp16)[name = tensor<string, []>("attn_cast_fp16")];
             tensor<int32, [4]> var_431 = const()[name = tensor<string, []>("op_431"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
             tensor<fp16, [1, 1280, 1, 1]> input_13_cast_fp16 = reshape(shape = var_431, x = attn_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
             tensor<int32, [2]> var_435 = const()[name = tensor<string, []>("op_435"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_437 = const()[name = tensor<string, []>("op_437"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> obj_25_pad_type_0 = const()[name = tensor<string, []>("obj_25_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> obj_25_pad_0 = const()[name = tensor<string, []>("obj_25_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_weight_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(209364800)))];
             tensor<fp16, [1280]> layers_1_encoder_attn_o_proj_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_encoder_attn_o_proj_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212641664)))];
+            tensor<fp16, [1, 1280, 1, 1]> obj_25_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_bias_to_fp16, dilations = var_437, groups = var_277, pad = obj_25_pad_0, pad_type = obj_25_pad_type_0, strides = var_435, weight = layers_1_encoder_attn_o_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor<string, []>("obj_25_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor<string, []>("inputs_11_cast_fp16")];
+            tensor<int32, [1]> var_446 = const()[name = tensor<string, []>("op_446"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_11_cast_fp16 = reduce_mean(axes = var_446, keep_dims = var_278, x = inputs_11_cast_fp16)[name = tensor<string, []>("channels_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor<string, []>("zero_mean_sq_11_cast_fp16")];
+            tensor<int32, [1]> var_450 = const()[name = tensor<string, []>("op_450"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_451_cast_fp16 = reduce_mean(axes = var_450, keep_dims = var_278, x = zero_mean_sq_11_cast_fp16)[name = tensor<string, []>("op_451_cast_fp16")];
+            tensor<fp16, []> var_452_to_fp16 = const()[name = tensor<string, []>("op_452_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_453_cast_fp16 = add(x = var_451_cast_fp16, y = var_452_to_fp16)[name = tensor<string, []>("op_453_cast_fp16")];
             tensor<fp16, []> denom_11_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_11_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_453_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor<string, []>("out_11_cast_fp16")];
             tensor<fp16, [1280]> input_15_gamma_0_to_fp16 = const()[name = tensor<string, []>("input_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212644288)))];
             tensor<fp16, [1280]> input_15_beta_0_to_fp16 = const()[name = tensor<string, []>("input_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212646912)))];
             tensor<fp16, []> input_15_epsilon_0_to_fp16 = const()[name = tensor<string, []>("input_15_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<int32, [2]> var_464 = const()[name = tensor<string, []>("op_464"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_466 = const()[name = tensor<string, []>("op_466"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> input_17_pad_type_0 = const()[name = tensor<string, []>("input_17_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> input_17_pad_0 = const()[name = tensor<string, []>("input_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_weight_to_fp16"), val = tensor<fp16, [5120, 1280, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(212649536)))];
             tensor<fp16, [5120]> layers_1_fc1_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc1_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225756800)))];
+            tensor<fp16, [1, 5120, 1, 1]> input_17_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_466, groups = var_277, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_464, weight = layers_1_fc1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
             tensor<string, []> input_mode_0 = const()[name = tensor<string, []>("input_mode_0"), val = tensor<string, []>("EXACT")];
             tensor<fp16, [1, 5120, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_17_cast_fp16)[name = tensor<string, []>("input_cast_fp16")];
+            tensor<int32, [2]> var_472 = const()[name = tensor<string, []>("op_472"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_474 = const()[name = tensor<string, []>("op_474"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> hidden_states_5_pad_type_0 = const()[name = tensor<string, []>("hidden_states_5_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> hidden_states_5_pad_0 = const()[name = tensor<string, []>("hidden_states_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
             tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_weight_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_weight_to_fp16"), val = tensor<fp16, [1280, 5120, 1, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(225767104)))];
             tensor<fp16, [1280]> layers_1_fc2_bias_to_fp16 = const()[name = tensor<string, []>("layers_1_fc2_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238874368)))];
+            tensor<fp16, [1, 1280, 1, 1]> hidden_states_5_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_474, groups = var_277, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_472, weight = layers_1_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor<string, []>("hidden_states_5_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> inputs_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor<string, []>("inputs_cast_fp16")];
+            tensor<bool, []> var_485 = const()[name = tensor<string, []>("op_485"), val = tensor<bool, []>(true)];
+            tensor<int32, [1]> var_489 = const()[name = tensor<string, []>("op_489"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> channels_mean_cast_fp16 = reduce_mean(axes = var_489, keep_dims = var_485, x = inputs_cast_fp16)[name = tensor<string, []>("channels_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor<string, []>("zero_mean_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor<string, []>("zero_mean_sq_cast_fp16")];
+            tensor<int32, [1]> var_493 = const()[name = tensor<string, []>("op_493"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 1]> var_494_cast_fp16 = reduce_mean(axes = var_493, keep_dims = var_485, x = zero_mean_sq_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
+            tensor<fp16, []> var_495_to_fp16 = const()[name = tensor<string, []>("op_495_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 1, 1, 1]> var_496_cast_fp16 = add(x = var_494_cast_fp16, y = var_495_to_fp16)[name = tensor<string, []>("op_496_cast_fp16")];
             tensor<fp16, []> denom_epsilon_0_to_fp16 = const()[name = tensor<string, []>("denom_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 1]> denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_496_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
             tensor<fp16, [1, 1280, 1, 1]> out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor<string, []>("out_cast_fp16")];
             tensor<fp16, [1280]> hidden_states_gamma_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238876992)))];
             tensor<fp16, [1280]> hidden_states_beta_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238879616)))];
             tensor<fp16, []> hidden_states_epsilon_0_to_fp16 = const()[name = tensor<string, []>("hidden_states_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 1280, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor<string, []>("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_506_axes_0 = const()[name = tensor<string, []>("op_506_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1]> var_506_cast_fp16 = squeeze(axes = var_506_axes_0, x = hidden_states_cast_fp16)[name = tensor<string, []>("op_506_cast_fp16")];
+            tensor<int32, [3]> var_509_perm_0 = const()[name = tensor<string, []>("op_509_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
             tensor<fp16, [51866]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51866]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(238882240)))];
+            tensor<fp16, [1, 1, 1280]> transpose_0 = transpose(perm = var_509_perm_0, x = var_506_cast_fp16)[name = tensor<string, []>("transpose_0")];
             tensor<fp16, [1, 1, 51866]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = transpose_0)[name = tensor<string, []>("linear_0_cast_fp16")];
+            tensor<int32, []> var_513 = const()[name = tensor<string, []>("op_513"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_31_interleave_0 = const()[name = tensor<string, []>("obj_31_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 2560, 1, 1]> key_cache_updates = concat(axis = var_513, interleave = obj_31_interleave_0, values = (current_key_1_cast_fp16, current_key_cast_fp16))[name = tensor<string, []>("obj_31_cast_fp16")];
+            tensor<int32, []> var_516 = const()[name = tensor<string, []>("op_516"), val = tensor<int32, []>(1)];
+            tensor<bool, []> obj_33_interleave_0 = const()[name = tensor<string, []>("obj_33_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 2560, 1, 1]> value_cache_updates = concat(axis = var_516, interleave = obj_33_interleave_0, values = (current_value_1_cast_fp16, current_value_cast_fp16))[name = tensor<string, []>("obj_33_cast_fp16")];
+            tensor<int32, [4]> var_527_begin_0 = const()[name = tensor<string, []>("op_527_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_527_end_0 = const()[name = tensor<string, []>("op_527_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_527_end_mask_0 = const()[name = tensor<string, []>("op_527_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_527_cast_fp16 = slice_by_index(begin = var_527_begin_0, end = var_527_end_0, end_mask = var_527_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_527_cast_fp16")];
+            tensor<int32, [4]> var_530_begin_0 = const()[name = tensor<string, []>("op_530_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_530_end_0 = const()[name = tensor<string, []>("op_530_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_530_end_mask_0 = const()[name = tensor<string, []>("op_530_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_530_squeeze_mask_0 = const()[name = tensor<string, []>("op_530_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_530_cast_fp16 = slice_by_index(begin = var_530_begin_0, end = var_530_end_0, end_mask = var_530_end_mask_0, squeeze_mask = var_530_squeeze_mask_0, x = var_527_cast_fp16)[name = tensor<string, []>("op_530_cast_fp16")];
+            tensor<int32, [4]> var_545_begin_0 = const()[name = tensor<string, []>("op_545_begin_0"), val = tensor<int32, [4]>([0, 1, 0, 0])];
+            tensor<int32, [4]> var_545_end_0 = const()[name = tensor<string, []>("op_545_end_0"), val = tensor<int32, [4]>([1, 2, 1, 1500])];
+            tensor<bool, [4]> var_545_end_mask_0 = const()[name = tensor<string, []>("op_545_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_545_cast_fp16 = slice_by_index(begin = var_545_begin_0, end = var_545_end_0, end_mask = var_545_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_545_cast_fp16")];
+            tensor<int32, [4]> var_548_begin_0 = const()[name = tensor<string, []>("op_548_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_548_end_0 = const()[name = tensor<string, []>("op_548_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_548_end_mask_0 = const()[name = tensor<string, []>("op_548_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_548_squeeze_mask_0 = const()[name = tensor<string, []>("op_548_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_548_cast_fp16 = slice_by_index(begin = var_548_begin_0, end = var_548_end_0, end_mask = var_548_end_mask_0, squeeze_mask = var_548_squeeze_mask_0, x = var_545_cast_fp16)[name = tensor<string, []>("op_548_cast_fp16")];
+            tensor<int32, [4]> var_563_begin_0 = const()[name = tensor<string, []>("op_563_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_563_end_0 = const()[name = tensor<string, []>("op_563_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1500])];
+            tensor<bool, [4]> var_563_end_mask_0 = const()[name = tensor<string, []>("op_563_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_563_cast_fp16 = slice_by_index(begin = var_563_begin_0, end = var_563_end_0, end_mask = var_563_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_563_cast_fp16")];
+            tensor<int32, [4]> var_566_begin_0 = const()[name = tensor<string, []>("op_566_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_566_end_0 = const()[name = tensor<string, []>("op_566_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_566_end_mask_0 = const()[name = tensor<string, []>("op_566_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_566_squeeze_mask_0 = const()[name = tensor<string, []>("op_566_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_566_cast_fp16 = slice_by_index(begin = var_566_begin_0, end = var_566_end_0, end_mask = var_566_end_mask_0, squeeze_mask = var_566_squeeze_mask_0, x = var_563_cast_fp16)[name = tensor<string, []>("op_566_cast_fp16")];
+            tensor<int32, [4]> var_581_begin_0 = const()[name = tensor<string, []>("op_581_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_581_end_0 = const()[name = tensor<string, []>("op_581_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1500])];
+            tensor<bool, [4]> var_581_end_mask_0 = const()[name = tensor<string, []>("op_581_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_581_cast_fp16 = slice_by_index(begin = var_581_begin_0, end = var_581_end_0, end_mask = var_581_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_581_cast_fp16")];
+            tensor<int32, [4]> var_584_begin_0 = const()[name = tensor<string, []>("op_584_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_584_end_0 = const()[name = tensor<string, []>("op_584_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_584_end_mask_0 = const()[name = tensor<string, []>("op_584_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_584_squeeze_mask_0 = const()[name = tensor<string, []>("op_584_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_584_cast_fp16 = slice_by_index(begin = var_584_begin_0, end = var_584_end_0, end_mask = var_584_end_mask_0, squeeze_mask = var_584_squeeze_mask_0, x = var_581_cast_fp16)[name = tensor<string, []>("op_584_cast_fp16")];
+            tensor<int32, [4]> var_599_begin_0 = const()[name = tensor<string, []>("op_599_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_599_end_0 = const()[name = tensor<string, []>("op_599_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1500])];
+            tensor<bool, [4]> var_599_end_mask_0 = const()[name = tensor<string, []>("op_599_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_599_cast_fp16 = slice_by_index(begin = var_599_begin_0, end = var_599_end_0, end_mask = var_599_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_599_cast_fp16")];
+            tensor<int32, [4]> var_602_begin_0 = const()[name = tensor<string, []>("op_602_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_602_end_0 = const()[name = tensor<string, []>("op_602_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_602_end_mask_0 = const()[name = tensor<string, []>("op_602_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_602_squeeze_mask_0 = const()[name = tensor<string, []>("op_602_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_602_cast_fp16 = slice_by_index(begin = var_602_begin_0, end = var_602_end_0, end_mask = var_602_end_mask_0, squeeze_mask = var_602_squeeze_mask_0, x = var_599_cast_fp16)[name = tensor<string, []>("op_602_cast_fp16")];
+            tensor<int32, [4]> var_617_begin_0 = const()[name = tensor<string, []>("op_617_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_617_end_0 = const()[name = tensor<string, []>("op_617_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1500])];
+            tensor<bool, [4]> var_617_end_mask_0 = const()[name = tensor<string, []>("op_617_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_617_cast_fp16 = slice_by_index(begin = var_617_begin_0, end = var_617_end_0, end_mask = var_617_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_617_cast_fp16")];
+            tensor<int32, [4]> var_620_begin_0 = const()[name = tensor<string, []>("op_620_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_620_end_0 = const()[name = tensor<string, []>("op_620_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_620_end_mask_0 = const()[name = tensor<string, []>("op_620_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_620_squeeze_mask_0 = const()[name = tensor<string, []>("op_620_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_620_cast_fp16 = slice_by_index(begin = var_620_begin_0, end = var_620_end_0, end_mask = var_620_end_mask_0, squeeze_mask = var_620_squeeze_mask_0, x = var_617_cast_fp16)[name = tensor<string, []>("op_620_cast_fp16")];
+            tensor<int32, [4]> var_635_begin_0 = const()[name = tensor<string, []>("op_635_begin_0"), val = tensor<int32, [4]>([0, 6, 0, 0])];
+            tensor<int32, [4]> var_635_end_0 = const()[name = tensor<string, []>("op_635_end_0"), val = tensor<int32, [4]>([1, 7, 1, 1500])];
+            tensor<bool, [4]> var_635_end_mask_0 = const()[name = tensor<string, []>("op_635_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_635_cast_fp16 = slice_by_index(begin = var_635_begin_0, end = var_635_end_0, end_mask = var_635_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_635_cast_fp16")];
+            tensor<int32, [4]> var_638_begin_0 = const()[name = tensor<string, []>("op_638_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_638_end_0 = const()[name = tensor<string, []>("op_638_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_638_end_mask_0 = const()[name = tensor<string, []>("op_638_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_638_squeeze_mask_0 = const()[name = tensor<string, []>("op_638_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_638_cast_fp16 = slice_by_index(begin = var_638_begin_0, end = var_638_end_0, end_mask = var_638_end_mask_0, squeeze_mask = var_638_squeeze_mask_0, x = var_635_cast_fp16)[name = tensor<string, []>("op_638_cast_fp16")];
+            tensor<int32, [4]> var_653_begin_0 = const()[name = tensor<string, []>("op_653_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_653_end_0 = const()[name = tensor<string, []>("op_653_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1500])];
+            tensor<bool, [4]> var_653_end_mask_0 = const()[name = tensor<string, []>("op_653_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_653_cast_fp16 = slice_by_index(begin = var_653_begin_0, end = var_653_end_0, end_mask = var_653_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_653_cast_fp16")];
+            tensor<int32, [4]> var_656_begin_0 = const()[name = tensor<string, []>("op_656_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_656_end_0 = const()[name = tensor<string, []>("op_656_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_656_end_mask_0 = const()[name = tensor<string, []>("op_656_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_656_squeeze_mask_0 = const()[name = tensor<string, []>("op_656_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_656_cast_fp16 = slice_by_index(begin = var_656_begin_0, end = var_656_end_0, end_mask = var_656_end_mask_0, squeeze_mask = var_656_squeeze_mask_0, x = var_653_cast_fp16)[name = tensor<string, []>("op_656_cast_fp16")];
+            tensor<int32, [4]> var_671_begin_0 = const()[name = tensor<string, []>("op_671_begin_0"), val = tensor<int32, [4]>([0, 8, 0, 0])];
+            tensor<int32, [4]> var_671_end_0 = const()[name = tensor<string, []>("op_671_end_0"), val = tensor<int32, [4]>([1, 9, 1, 1500])];
+            tensor<bool, [4]> var_671_end_mask_0 = const()[name = tensor<string, []>("op_671_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_671_cast_fp16 = slice_by_index(begin = var_671_begin_0, end = var_671_end_0, end_mask = var_671_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_671_cast_fp16")];
+            tensor<int32, [4]> var_674_begin_0 = const()[name = tensor<string, []>("op_674_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_674_end_0 = const()[name = tensor<string, []>("op_674_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_674_end_mask_0 = const()[name = tensor<string, []>("op_674_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_674_squeeze_mask_0 = const()[name = tensor<string, []>("op_674_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_674_cast_fp16 = slice_by_index(begin = var_674_begin_0, end = var_674_end_0, end_mask = var_674_end_mask_0, squeeze_mask = var_674_squeeze_mask_0, x = var_671_cast_fp16)[name = tensor<string, []>("op_674_cast_fp16")];
+            tensor<int32, [4]> var_689_begin_0 = const()[name = tensor<string, []>("op_689_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_689_end_0 = const()[name = tensor<string, []>("op_689_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1500])];
+            tensor<bool, [4]> var_689_end_mask_0 = const()[name = tensor<string, []>("op_689_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = var_689_end_0, end_mask = var_689_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_689_cast_fp16")];
+            tensor<int32, [4]> var_692_begin_0 = const()[name = tensor<string, []>("op_692_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_692_end_0 = const()[name = tensor<string, []>("op_692_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_692_end_mask_0 = const()[name = tensor<string, []>("op_692_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_692_squeeze_mask_0 = const()[name = tensor<string, []>("op_692_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_692_cast_fp16 = slice_by_index(begin = var_692_begin_0, end = var_692_end_0, end_mask = var_692_end_mask_0, squeeze_mask = var_692_squeeze_mask_0, x = var_689_cast_fp16)[name = tensor<string, []>("op_692_cast_fp16")];
+            tensor<int32, [4]> var_707_begin_0 = const()[name = tensor<string, []>("op_707_begin_0"), val = tensor<int32, [4]>([0, 10, 0, 0])];
+            tensor<int32, [4]> var_707_end_0 = const()[name = tensor<string, []>("op_707_end_0"), val = tensor<int32, [4]>([1, 11, 1, 1500])];
+            tensor<bool, [4]> var_707_end_mask_0 = const()[name = tensor<string, []>("op_707_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_707_cast_fp16 = slice_by_index(begin = var_707_begin_0, end = var_707_end_0, end_mask = var_707_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_707_cast_fp16")];
+            tensor<int32, [4]> var_710_begin_0 = const()[name = tensor<string, []>("op_710_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_710_end_0 = const()[name = tensor<string, []>("op_710_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_710_end_mask_0 = const()[name = tensor<string, []>("op_710_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_710_squeeze_mask_0 = const()[name = tensor<string, []>("op_710_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_710_cast_fp16 = slice_by_index(begin = var_710_begin_0, end = var_710_end_0, end_mask = var_710_end_mask_0, squeeze_mask = var_710_squeeze_mask_0, x = var_707_cast_fp16)[name = tensor<string, []>("op_710_cast_fp16")];
+            tensor<int32, [4]> var_725_begin_0 = const()[name = tensor<string, []>("op_725_begin_0"), val = tensor<int32, [4]>([0, 11, 0, 0])];
+            tensor<int32, [4]> var_725_end_0 = const()[name = tensor<string, []>("op_725_end_0"), val = tensor<int32, [4]>([1, 12, 1, 1500])];
+            tensor<bool, [4]> var_725_end_mask_0 = const()[name = tensor<string, []>("op_725_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_725_cast_fp16 = slice_by_index(begin = var_725_begin_0, end = var_725_end_0, end_mask = var_725_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_725_cast_fp16")];
+            tensor<int32, [4]> var_728_begin_0 = const()[name = tensor<string, []>("op_728_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_728_end_0 = const()[name = tensor<string, []>("op_728_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_728_end_mask_0 = const()[name = tensor<string, []>("op_728_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_728_squeeze_mask_0 = const()[name = tensor<string, []>("op_728_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_728_cast_fp16 = slice_by_index(begin = var_728_begin_0, end = var_728_end_0, end_mask = var_728_end_mask_0, squeeze_mask = var_728_squeeze_mask_0, x = var_725_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
+            tensor<int32, [4]> var_743_begin_0 = const()[name = tensor<string, []>("op_743_begin_0"), val = tensor<int32, [4]>([0, 12, 0, 0])];
+            tensor<int32, [4]> var_743_end_0 = const()[name = tensor<string, []>("op_743_end_0"), val = tensor<int32, [4]>([1, 13, 1, 1500])];
+            tensor<bool, [4]> var_743_end_mask_0 = const()[name = tensor<string, []>("op_743_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_743_cast_fp16 = slice_by_index(begin = var_743_begin_0, end = var_743_end_0, end_mask = var_743_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_743_cast_fp16")];
+            tensor<int32, [4]> var_746_begin_0 = const()[name = tensor<string, []>("op_746_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_746_end_0 = const()[name = tensor<string, []>("op_746_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_746_end_mask_0 = const()[name = tensor<string, []>("op_746_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_746_squeeze_mask_0 = const()[name = tensor<string, []>("op_746_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = var_746_end_0, end_mask = var_746_end_mask_0, squeeze_mask = var_746_squeeze_mask_0, x = var_743_cast_fp16)[name = tensor<string, []>("op_746_cast_fp16")];
+            tensor<int32, [4]> var_761_begin_0 = const()[name = tensor<string, []>("op_761_begin_0"), val = tensor<int32, [4]>([0, 13, 0, 0])];
+            tensor<int32, [4]> var_761_end_0 = const()[name = tensor<string, []>("op_761_end_0"), val = tensor<int32, [4]>([1, 14, 1, 1500])];
+            tensor<bool, [4]> var_761_end_mask_0 = const()[name = tensor<string, []>("op_761_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_761_cast_fp16 = slice_by_index(begin = var_761_begin_0, end = var_761_end_0, end_mask = var_761_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_761_cast_fp16")];
+            tensor<int32, [4]> var_764_begin_0 = const()[name = tensor<string, []>("op_764_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_764_end_0 = const()[name = tensor<string, []>("op_764_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_764_end_mask_0 = const()[name = tensor<string, []>("op_764_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_764_squeeze_mask_0 = const()[name = tensor<string, []>("op_764_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_764_cast_fp16 = slice_by_index(begin = var_764_begin_0, end = var_764_end_0, end_mask = var_764_end_mask_0, squeeze_mask = var_764_squeeze_mask_0, x = var_761_cast_fp16)[name = tensor<string, []>("op_764_cast_fp16")];
+            tensor<int32, [4]> var_779_begin_0 = const()[name = tensor<string, []>("op_779_begin_0"), val = tensor<int32, [4]>([0, 14, 0, 0])];
+            tensor<int32, [4]> var_779_end_0 = const()[name = tensor<string, []>("op_779_end_0"), val = tensor<int32, [4]>([1, 15, 1, 1500])];
+            tensor<bool, [4]> var_779_end_mask_0 = const()[name = tensor<string, []>("op_779_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_779_cast_fp16 = slice_by_index(begin = var_779_begin_0, end = var_779_end_0, end_mask = var_779_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_779_cast_fp16")];
+            tensor<int32, [4]> var_782_begin_0 = const()[name = tensor<string, []>("op_782_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_782_end_0 = const()[name = tensor<string, []>("op_782_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_782_end_mask_0 = const()[name = tensor<string, []>("op_782_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_782_squeeze_mask_0 = const()[name = tensor<string, []>("op_782_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_782_cast_fp16 = slice_by_index(begin = var_782_begin_0, end = var_782_end_0, end_mask = var_782_end_mask_0, squeeze_mask = var_782_squeeze_mask_0, x = var_779_cast_fp16)[name = tensor<string, []>("op_782_cast_fp16")];
+            tensor<int32, [4]> var_797_begin_0 = const()[name = tensor<string, []>("op_797_begin_0"), val = tensor<int32, [4]>([0, 15, 0, 0])];
+            tensor<int32, [4]> var_797_end_0 = const()[name = tensor<string, []>("op_797_end_0"), val = tensor<int32, [4]>([1, 16, 1, 1500])];
+            tensor<bool, [4]> var_797_end_mask_0 = const()[name = tensor<string, []>("op_797_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_797_cast_fp16 = slice_by_index(begin = var_797_begin_0, end = var_797_end_0, end_mask = var_797_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_797_cast_fp16")];
+            tensor<int32, [4]> var_800_begin_0 = const()[name = tensor<string, []>("op_800_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_800_end_0 = const()[name = tensor<string, []>("op_800_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_800_end_mask_0 = const()[name = tensor<string, []>("op_800_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_800_squeeze_mask_0 = const()[name = tensor<string, []>("op_800_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_800_cast_fp16 = slice_by_index(begin = var_800_begin_0, end = var_800_end_0, end_mask = var_800_end_mask_0, squeeze_mask = var_800_squeeze_mask_0, x = var_797_cast_fp16)[name = tensor<string, []>("op_800_cast_fp16")];
+            tensor<int32, [4]> var_815_begin_0 = const()[name = tensor<string, []>("op_815_begin_0"), val = tensor<int32, [4]>([0, 16, 0, 0])];
+            tensor<int32, [4]> var_815_end_0 = const()[name = tensor<string, []>("op_815_end_0"), val = tensor<int32, [4]>([1, 17, 1, 1500])];
+            tensor<bool, [4]> var_815_end_mask_0 = const()[name = tensor<string, []>("op_815_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_815_cast_fp16 = slice_by_index(begin = var_815_begin_0, end = var_815_end_0, end_mask = var_815_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_815_cast_fp16")];
+            tensor<int32, [4]> var_818_begin_0 = const()[name = tensor<string, []>("op_818_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_818_end_0 = const()[name = tensor<string, []>("op_818_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_818_end_mask_0 = const()[name = tensor<string, []>("op_818_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_818_squeeze_mask_0 = const()[name = tensor<string, []>("op_818_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_818_cast_fp16 = slice_by_index(begin = var_818_begin_0, end = var_818_end_0, end_mask = var_818_end_mask_0, squeeze_mask = var_818_squeeze_mask_0, x = var_815_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<int32, [4]> var_833_begin_0 = const()[name = tensor<string, []>("op_833_begin_0"), val = tensor<int32, [4]>([0, 17, 0, 0])];
+            tensor<int32, [4]> var_833_end_0 = const()[name = tensor<string, []>("op_833_end_0"), val = tensor<int32, [4]>([1, 18, 1, 1500])];
+            tensor<bool, [4]> var_833_end_mask_0 = const()[name = tensor<string, []>("op_833_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_833_cast_fp16 = slice_by_index(begin = var_833_begin_0, end = var_833_end_0, end_mask = var_833_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_833_cast_fp16")];
+            tensor<int32, [4]> var_836_begin_0 = const()[name = tensor<string, []>("op_836_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_836_end_0 = const()[name = tensor<string, []>("op_836_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_836_end_mask_0 = const()[name = tensor<string, []>("op_836_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_836_squeeze_mask_0 = const()[name = tensor<string, []>("op_836_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_836_cast_fp16 = slice_by_index(begin = var_836_begin_0, end = var_836_end_0, end_mask = var_836_end_mask_0, squeeze_mask = var_836_squeeze_mask_0, x = var_833_cast_fp16)[name = tensor<string, []>("op_836_cast_fp16")];
+            tensor<int32, [4]> var_851_begin_0 = const()[name = tensor<string, []>("op_851_begin_0"), val = tensor<int32, [4]>([0, 18, 0, 0])];
+            tensor<int32, [4]> var_851_end_0 = const()[name = tensor<string, []>("op_851_end_0"), val = tensor<int32, [4]>([1, 19, 1, 1500])];
+            tensor<bool, [4]> var_851_end_mask_0 = const()[name = tensor<string, []>("op_851_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_851_cast_fp16 = slice_by_index(begin = var_851_begin_0, end = var_851_end_0, end_mask = var_851_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_851_cast_fp16")];
+            tensor<int32, [4]> var_854_begin_0 = const()[name = tensor<string, []>("op_854_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_854_end_0 = const()[name = tensor<string, []>("op_854_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_854_end_mask_0 = const()[name = tensor<string, []>("op_854_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_854_squeeze_mask_0 = const()[name = tensor<string, []>("op_854_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_854_cast_fp16 = slice_by_index(begin = var_854_begin_0, end = var_854_end_0, end_mask = var_854_end_mask_0, squeeze_mask = var_854_squeeze_mask_0, x = var_851_cast_fp16)[name = tensor<string, []>("op_854_cast_fp16")];
+            tensor<int32, [4]> var_869_begin_0 = const()[name = tensor<string, []>("op_869_begin_0"), val = tensor<int32, [4]>([0, 19, 0, 0])];
+            tensor<int32, [4]> var_869_end_0 = const()[name = tensor<string, []>("op_869_end_0"), val = tensor<int32, [4]>([1, 20, 1, 1500])];
+            tensor<bool, [4]> var_869_end_mask_0 = const()[name = tensor<string, []>("op_869_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1500]> var_869_cast_fp16 = slice_by_index(begin = var_869_begin_0, end = var_869_end_0, end_mask = var_869_end_mask_0, x = obj_27_cast_fp16)[name = tensor<string, []>("op_869_cast_fp16")];
+            tensor<int32, [4]> var_872_begin_0 = const()[name = tensor<string, []>("op_872_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_872_end_0 = const()[name = tensor<string, []>("op_872_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1500])];
+            tensor<bool, [4]> var_872_end_mask_0 = const()[name = tensor<string, []>("op_872_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_872_squeeze_mask_0 = const()[name = tensor<string, []>("op_872_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1500]> var_872_cast_fp16 = slice_by_index(begin = var_872_begin_0, end = var_872_end_0, end_mask = var_872_end_mask_0, squeeze_mask = var_872_squeeze_mask_0, x = var_869_cast_fp16)[name = tensor<string, []>("op_872_cast_fp16")];
+            tensor<int32, []> var_879 = const()[name = tensor<string, []>("op_879"), val = tensor<int32, []>(1)];
+            tensor<bool, []> var_880_interleave_0 = const()[name = tensor<string, []>("op_880_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 20, 1500]> var_880_cast_fp16 = concat(axis = var_879, interleave = var_880_interleave_0, values = (var_530_cast_fp16, var_548_cast_fp16, var_566_cast_fp16, var_584_cast_fp16, var_602_cast_fp16, var_620_cast_fp16, var_638_cast_fp16, var_656_cast_fp16, var_674_cast_fp16, var_692_cast_fp16, var_710_cast_fp16, var_728_cast_fp16, var_746_cast_fp16, var_764_cast_fp16, var_782_cast_fp16, var_800_cast_fp16, var_818_cast_fp16, var_836_cast_fp16, var_854_cast_fp16, var_872_cast_fp16))[name = tensor<string, []>("op_880_cast_fp16")];
+            tensor<int32, [1]> var_882 = const()[name = tensor<string, []>("op_882"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_883 = const()[name = tensor<string, []>("op_883"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 1500]> alignment_heads_weights = reduce_mean(axes = var_882, keep_dims = var_883, x = var_880_cast_fp16)[name = tensor<string, []>("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
 }

distil-whisper_distil-large-v3_turbo_600MB/TextDecoder.mlmodelc/weights/weight.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7415eed816b4f2f69ce64ece61328f9f60f96e2201a45cb01caca6d413cc6e94
 size 238986036

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e8e3906c4f1717fe848f6d45e6e052fc3d6048ec24d3a34e23e1c972b10bc84
 size 238986036