diff --git "a/Qwen-2-05B-6Bits-MF.mlmodelc/model.mil" "b/Qwen-2-05B-6Bits-MF.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/Qwen-2-05B-6Bits-MF.mlmodelc/model.mil" @@ -0,0 +1,8684 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.3.14.1"}})] +{ + func length_1(tensor input_ids, state> key_cache_state, tensor query_pos1, state> value_cache_state) { + tensor expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor([-1, -2])]; + tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = query_pos1)[name = string("expand_dims_0")]; + tensor mask_gather_x_0 = const()[name = string("mask_gather_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 mask_gather_axis_0 = const()[name = string("mask_gather_axis_0"), val = int32(0)]; + int32 mask_gather_batch_dims_0 = const()[name = string("mask_gather_batch_dims_0"), val = int32(0)]; + bool mask_gather_validate_indices_0 = const()[name = string("mask_gather_validate_indices_0"), val = bool(false)]; + tensor mask_gather = gather(axis = mask_gather_axis_0, batch_dims = mask_gather_batch_dims_0, indices = expand_dims_0, validate_indices = mask_gather_validate_indices_0, x = mask_gather_x_0)[name = string("mask_gather")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor query_sin_emb_x_0 = const()[name = string("query_sin_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524416)))]; + int32 query_sin_emb_axis_0 = const()[name = string("query_sin_emb_axis_0"), val = int32(0)]; + int32 query_sin_emb_batch_dims_0 = const()[name = string("query_sin_emb_batch_dims_0"), val = int32(0)]; + bool query_sin_emb_validate_indices_0 = const()[name = string("query_sin_emb_validate_indices_0"), val = bool(false)]; + tensor query_sin_emb = gather(axis = query_sin_emb_axis_0, batch_dims = query_sin_emb_batch_dims_0, indices = expand_dims_0, validate_indices = query_sin_emb_validate_indices_0, x = query_sin_emb_x_0)[name = string("query_sin_emb")]; + tensor query_cos_emb_x_0 = const()[name = string("query_cos_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(590016)))]; + int32 query_cos_emb_axis_0 = const()[name = string("query_cos_emb_axis_0"), val = int32(0)]; + int32 query_cos_emb_batch_dims_0 = const()[name = string("query_cos_emb_batch_dims_0"), val = int32(0)]; + bool query_cos_emb_validate_indices_0 = const()[name = string("query_cos_emb_validate_indices_0"), val = bool(false)]; + tensor query_cos_emb = gather(axis = query_cos_emb_axis_0, batch_dims = query_cos_emb_batch_dims_0, indices = expand_dims_0, validate_indices = query_cos_emb_validate_indices_0, x = query_cos_emb_x_0)[name = string("query_cos_emb")]; + tensor token_embedding_x_0 = const()[name = string("token_embedding_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655616)))]; + int32 token_embedding_axis_0 = const()[name = string("token_embedding_axis_0"), val = int32(0)]; + int32 token_embedding_batch_dims_0 = const()[name = string("token_embedding_batch_dims_0"), val = int32(0)]; + bool token_embedding_validate_indices_0 = const()[name = string("token_embedding_validate_indices_0"), val = bool(false)]; + tensor token_embedding = gather(axis = token_embedding_axis_0, batch_dims = token_embedding_batch_dims_0, indices = input_ids, validate_indices = token_embedding_validate_indices_0, x = token_embedding_x_0)[name = string("token_embedding")]; + tensor input_embeddings_channels_first_perm_0 = const()[name = string("input_embeddings_channels_first_perm_0"), val = tensor([0, 2, 1])]; + int32 end_pos_0_x_0 = const()[name = string("end_pos_0_x_0"), val = int32(1)]; + tensor end_pos_0 = add(x = end_pos_0_x_0, y = query_pos1)[name = string("end_pos_0")]; + tensor read_state_0 = read_state(input = key_cache_state)[name = string("read_state_0")]; + tensor read_state_1 = read_state(input = value_cache_state)[name = string("read_state_1")]; + tensor input_embeddings_channels_first = transpose(perm = input_embeddings_channels_first_perm_0, x = token_embedding)[name = string("transpose_49")]; + tensor block_0_attention_rmsnorm_abs = abs(x = input_embeddings_channels_first)[name = string("block_0_attention_rmsnorm_abs")]; + tensor block_0_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_0_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_0_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_0_attention_rmsnorm_maxval = reduce_max(axes = block_0_attention_rmsnorm_maxval_axes_0, keep_dims = block_0_attention_rmsnorm_maxval_keep_dims_0, x = block_0_attention_rmsnorm_abs)[name = string("block_0_attention_rmsnorm_maxval")]; + fp16 block_0_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_0_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_0_attention_rmsnorm_maxval_clipped = clip(alpha = block_0_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_0_attention_rmsnorm_maxval_clipped_beta_0, x = block_0_attention_rmsnorm_maxval)[name = string("block_0_attention_rmsnorm_maxval_clipped")]; + tensor block_0_attention_rmsnorm_scaled = real_div(x = input_embeddings_channels_first, y = block_0_attention_rmsnorm_maxval_clipped)[name = string("block_0_attention_rmsnorm_scaled")]; + tensor block_0_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_0_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_0_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_0_attention_rmsnorm_squared_sum_keep_dims_0, x = block_0_attention_rmsnorm_scaled)[name = string("block_0_attention_rmsnorm_squared_sum")]; + fp16 block_0_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_0_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_0_attention_rmsnorm_rsqrt_epsilon_0, x = block_0_attention_rmsnorm_squared_sum)[name = string("block_0_attention_rmsnorm_rsqrt")]; + fp16 block_0_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_0_attention_rmsnorm_dim_scaled = mul(x = block_0_attention_rmsnorm_scaled, y = block_0_attention_rmsnorm_dim_scaled_y_0)[name = string("block_0_attention_rmsnorm_dim_scaled")]; + tensor block_0_attention_rmsnorm_normalized = mul(x = block_0_attention_rmsnorm_dim_scaled, y = block_0_attention_rmsnorm_rsqrt)[name = string("block_0_attention_rmsnorm_normalized")]; + tensor block_0_attention_rmsnorm_y_0 = const()[name = string("block_0_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272924992)))]; + tensor block_0_attention_rmsnorm = mul(x = block_0_attention_rmsnorm_normalized, y = block_0_attention_rmsnorm_y_0)[name = string("block_0_attention_rmsnorm")]; + tensor attention_0_qkvproj_weight_0 = const()[name = string("attention_0_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272926848)))]; + tensor attention_0_qkvproj_bias_0 = const()[name = string("attention_0_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274991296)))]; + tensor attention_0_qkvproj_strides_0 = const()[name = string("attention_0_qkvproj_strides_0"), val = tensor([1])]; + string attention_0_qkvproj_pad_type_0 = const()[name = string("attention_0_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_0_qkvproj_pad_0 = const()[name = string("attention_0_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_0_qkvproj_dilations_0 = const()[name = string("attention_0_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_0_qkvproj_groups_0 = const()[name = string("attention_0_qkvproj_groups_0"), val = int32(1)]; + tensor attention_0_qkvproj = conv(bias = attention_0_qkvproj_bias_0, dilations = attention_0_qkvproj_dilations_0, groups = attention_0_qkvproj_groups_0, pad = attention_0_qkvproj_pad_0, pad_type = attention_0_qkvproj_pad_type_0, strides = attention_0_qkvproj_strides_0, weight = attention_0_qkvproj_weight_0, x = block_0_attention_rmsnorm)[name = string("attention_0_qkvproj")]; + tensor attention_0_head_reshape_shape_0 = const()[name = string("attention_0_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_0_head_reshape = reshape(shape = attention_0_head_reshape_shape_0, x = attention_0_qkvproj)[name = string("attention_0_head_reshape")]; + tensor attention_0_head_transpose_perm_0 = const()[name = string("attention_0_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_0_split_qkv_heads_axis_0 = const()[name = string("attention_0_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_0_split_qkv_heads_split_sizes_0 = const()[name = string("attention_0_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_0_head_transpose = transpose(perm = attention_0_head_transpose_perm_0, x = attention_0_head_reshape)[name = string("transpose_48")]; + tensor attention_0_split_qkv_heads_0, tensor attention_0_split_qkv_heads_1, tensor attention_0_split_qkv_heads_2 = split(axis = attention_0_split_qkv_heads_axis_0, split_sizes = attention_0_split_qkv_heads_split_sizes_0, x = attention_0_head_transpose)[name = string("attention_0_split_qkv_heads")]; + tensor attention_0_q_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_0_q_rope_lhs_mult")]; + int32 attention_0_q_rotate_half_split_num_splits_0 = const()[name = string("attention_0_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_0_q_rotate_half_split_axis_0 = const()[name = string("attention_0_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_0_q_rotate_half_split_0, tensor attention_0_q_rotate_half_split_1 = split(axis = attention_0_q_rotate_half_split_axis_0, num_splits = attention_0_q_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_0)[name = string("attention_0_q_rotate_half_split")]; + fp16 attention_0_q_rotate_half_neg_y_0 = const()[name = string("attention_0_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_0_q_rotate_half_neg = mul(x = attention_0_q_rotate_half_split_1, y = attention_0_q_rotate_half_neg_y_0)[name = string("attention_0_q_rotate_half_neg")]; + int32 attention_0_q_rotate_half_concat_axis_0 = const()[name = string("attention_0_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_0_q_rotate_half_concat_interleave_0 = const()[name = string("attention_0_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_0_q_rotate_half_concat = concat(axis = attention_0_q_rotate_half_concat_axis_0, interleave = attention_0_q_rotate_half_concat_interleave_0, values = (attention_0_q_rotate_half_neg, attention_0_q_rotate_half_split_0))[name = string("attention_0_q_rotate_half_concat")]; + tensor attention_0_q_rope_rhs_mult = mul(x = attention_0_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_q_rope_rhs_mult")]; + tensor attention_0_q_rope = add(x = attention_0_q_rope_lhs_mult, y = attention_0_q_rope_rhs_mult)[name = string("attention_0_q_rope")]; + tensor attention_0_k_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_0_k_rope_lhs_mult")]; + int32 attention_0_k_rotate_half_split_num_splits_0 = const()[name = string("attention_0_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_0_k_rotate_half_split_axis_0 = const()[name = string("attention_0_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_0_k_rotate_half_split_0, tensor attention_0_k_rotate_half_split_1 = split(axis = attention_0_k_rotate_half_split_axis_0, num_splits = attention_0_k_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_1)[name = string("attention_0_k_rotate_half_split")]; + fp16 attention_0_k_rotate_half_neg_y_0 = const()[name = string("attention_0_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_0_k_rotate_half_neg = mul(x = attention_0_k_rotate_half_split_1, y = attention_0_k_rotate_half_neg_y_0)[name = string("attention_0_k_rotate_half_neg")]; + int32 attention_0_k_rotate_half_concat_axis_0 = const()[name = string("attention_0_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_0_k_rotate_half_concat_interleave_0 = const()[name = string("attention_0_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_0_k_rotate_half_concat = concat(axis = attention_0_k_rotate_half_concat_axis_0, interleave = attention_0_k_rotate_half_concat_interleave_0, values = (attention_0_k_rotate_half_neg, attention_0_k_rotate_half_split_0))[name = string("attention_0_k_rotate_half_concat")]; + tensor attention_0_k_rope_rhs_mult = mul(x = attention_0_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_k_rope_rhs_mult")]; + tensor attention_0_k_rope = add(x = attention_0_k_rope_lhs_mult, y = attention_0_k_rope_rhs_mult)[name = string("attention_0_k_rope")]; + int32 attention_0_q_splits_axis_0 = const()[name = string("attention_0_q_splits_axis_0"), val = int32(1)]; + int32 attention_0_q_splits_num_splits_0 = const()[name = string("attention_0_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_0_q_splits_0, tensor attention_0_q_splits_1 = split(axis = attention_0_q_splits_axis_0, num_splits = attention_0_q_splits_num_splits_0, x = attention_0_q_rope)[name = string("attention_0_q_splits")]; + tensor attention_0_update_begin_0_values0_0 = const()[name = string("attention_0_update_begin_0_values0_0"), val = tensor([0])]; + tensor attention_0_update_begin_0_values1_0 = const()[name = string("attention_0_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_0_update_begin_0_values3_0 = const()[name = string("attention_0_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_0_update_begin_0_axis_0 = const()[name = string("attention_0_update_begin_0_axis_0"), val = int32(0)]; + bool attention_0_update_begin_0_interleave_0 = const()[name = string("attention_0_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_0_update_begin_0 = concat(axis = attention_0_update_begin_0_axis_0, interleave = attention_0_update_begin_0_interleave_0, values = (attention_0_update_begin_0_values0_0, attention_0_update_begin_0_values1_0, query_pos1, attention_0_update_begin_0_values3_0))[name = string("attention_0_update_begin_0")]; + tensor attention_0_update_end_0_values0_0 = const()[name = string("attention_0_update_end_0_values0_0"), val = tensor([1])]; + tensor attention_0_update_end_0_values1_0 = const()[name = string("attention_0_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_0_update_end_0_values3_0 = const()[name = string("attention_0_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_0_update_end_0_axis_0 = const()[name = string("attention_0_update_end_0_axis_0"), val = int32(0)]; + bool attention_0_update_end_0_interleave_0 = const()[name = string("attention_0_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_0_update_end_0 = concat(axis = attention_0_update_end_0_axis_0, interleave = attention_0_update_end_0_interleave_0, values = (attention_0_update_end_0_values0_0, attention_0_update_end_0_values1_0, end_pos_0, attention_0_update_end_0_values3_0))[name = string("attention_0_update_end_0")]; + tensor attention_0_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_updated_key_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_key_cache_0_squeeze_mask_0, update = attention_0_k_rope, x = read_state_0)[name = string("attention_0_updated_key_cache_0")]; + write_state(data = attention_0_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_0_write_state")]; + tensor coreml_update_state_0 = read_state(input = key_cache_state)[name = string("coreml_update_state_0")]; + tensor attention_0_key_cache_begin_0 = const()[name = string("attention_0_key_cache_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor attention_0_key_cache_end_0 = const()[name = string("attention_0_key_cache_end_0"), val = tensor([1, 2, 512, 64])]; + tensor attention_0_key_cache_squeeze_mask_0 = const()[name = string("attention_0_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_key_cache = slice_by_index(begin = attention_0_key_cache_begin_0, end = attention_0_key_cache_end_0, squeeze_mask = attention_0_key_cache_squeeze_mask_0, x = coreml_update_state_0)[name = string("attention_0_key_cache")]; + int32 attention_0_key_cache_head_axis_0 = const()[name = string("attention_0_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_0_key_cache_head_num_splits_0 = const()[name = string("attention_0_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_0_key_cache_head_0, tensor attention_0_key_cache_head_1 = split(axis = attention_0_key_cache_head_axis_0, num_splits = attention_0_key_cache_head_num_splits_0, x = attention_0_key_cache)[name = string("attention_0_key_cache_head")]; + tensor attention_0_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_updated_value_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_value_cache_0_squeeze_mask_0, update = attention_0_split_qkv_heads_2, x = read_state_1)[name = string("attention_0_updated_value_cache_0")]; + write_state(data = attention_0_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_1_write_state")]; + tensor coreml_update_state_1 = read_state(input = value_cache_state)[name = string("coreml_update_state_1")]; + tensor attention_0_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_0_slice_current_layer_value_cache_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor attention_0_slice_current_layer_value_cache_end_0 = const()[name = string("attention_0_slice_current_layer_value_cache_end_0"), val = tensor([1, 2, 512, 64])]; + tensor attention_0_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_0_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_slice_current_layer_value_cache = slice_by_index(begin = attention_0_slice_current_layer_value_cache_begin_0, end = attention_0_slice_current_layer_value_cache_end_0, squeeze_mask = attention_0_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_1)[name = string("attention_0_slice_current_layer_value_cache")]; + int32 attention_0_slice_value_cache_heads_axis_0 = const()[name = string("attention_0_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_0_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_0_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_0_slice_value_cache_heads_0, tensor attention_0_slice_value_cache_heads_1 = split(axis = attention_0_slice_value_cache_heads_axis_0, num_splits = attention_0_slice_value_cache_heads_num_splits_0, x = attention_0_slice_current_layer_value_cache)[name = string("attention_0_slice_value_cache_heads")]; + bool attention_0_scores_0_transpose_y_0 = const()[name = string("attention_0_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_0_scores_0_transpose_x_0 = const()[name = string("attention_0_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_0_scores_0 = matmul(transpose_x = attention_0_scores_0_transpose_x_0, transpose_y = attention_0_scores_0_transpose_y_0, x = attention_0_key_cache_head_0, y = attention_0_q_splits_0)[name = string("attention_0_scores_0")]; + fp16 attention_0_scaled_scores_0_y_0 = const()[name = string("attention_0_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_0_scaled_scores_0 = mul(x = attention_0_scores_0, y = attention_0_scaled_scores_0_y_0)[name = string("attention_0_scaled_scores_0")]; + tensor transpose_0 = transpose(perm = transpose_0_perm_0, x = mask_gather)[name = string("transpose_50")]; + tensor attention_0_masked_scaled_scores_0 = add(x = attention_0_scaled_scores_0, y = transpose_0)[name = string("attention_0_masked_scaled_scores_0")]; + int32 softmax_0_axis_0 = const()[name = string("softmax_0_axis_0"), val = int32(-2)]; + tensor softmax_0 = softmax(axis = softmax_0_axis_0, x = attention_0_masked_scaled_scores_0)[name = string("softmax_0")]; + bool attention_0_attention_0_transpose_x_0 = const()[name = string("attention_0_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_0_attention_0_transpose_y_0 = const()[name = string("attention_0_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_0_attention_0 = matmul(transpose_x = attention_0_attention_0_transpose_x_0, transpose_y = attention_0_attention_0_transpose_y_0, x = softmax_0, y = attention_0_slice_value_cache_heads_0)[name = string("attention_0_attention_0")]; + bool attention_0_scores_1_transpose_y_0 = const()[name = string("attention_0_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_0_scores_1_transpose_x_0 = const()[name = string("attention_0_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_0_scores_1 = matmul(transpose_x = attention_0_scores_1_transpose_x_0, transpose_y = attention_0_scores_1_transpose_y_0, x = attention_0_key_cache_head_1, y = attention_0_q_splits_1)[name = string("attention_0_scores_1")]; + fp16 attention_0_scaled_scores_1_y_0 = const()[name = string("attention_0_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_0_scaled_scores_1 = mul(x = attention_0_scores_1, y = attention_0_scaled_scores_1_y_0)[name = string("attention_0_scaled_scores_1")]; + tensor attention_0_masked_scaled_scores_1 = add(x = attention_0_scaled_scores_1, y = transpose_0)[name = string("attention_0_masked_scaled_scores_1")]; + int32 softmax_1_axis_0 = const()[name = string("softmax_1_axis_0"), val = int32(-2)]; + tensor softmax_1 = softmax(axis = softmax_1_axis_0, x = attention_0_masked_scaled_scores_1)[name = string("softmax_1")]; + bool attention_0_attention_1_transpose_x_0 = const()[name = string("attention_0_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_0_attention_1_transpose_y_0 = const()[name = string("attention_0_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_0_attention_1 = matmul(transpose_x = attention_0_attention_1_transpose_x_0, transpose_y = attention_0_attention_1_transpose_y_0, x = softmax_1, y = attention_0_slice_value_cache_heads_1)[name = string("attention_0_attention_1")]; + int32 attention_0_concat_attention_all_heads_axis_0 = const()[name = string("attention_0_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_0_concat_attention_all_heads_interleave_0 = const()[name = string("attention_0_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_0_concat_attention_all_heads = concat(axis = attention_0_concat_attention_all_heads_axis_0, interleave = attention_0_concat_attention_all_heads_interleave_0, values = (attention_0_attention_0, attention_0_attention_1))[name = string("attention_0_concat_attention_all_heads")]; + tensor attention_0_channels_first_retransposed_perm_0 = const()[name = string("attention_0_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_0_reshaped_shape_0 = const()[name = string("attention_0_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_0_channels_first_retransposed = transpose(perm = attention_0_channels_first_retransposed_perm_0, x = attention_0_concat_attention_all_heads)[name = string("transpose_47")]; + tensor attention_0_reshaped = reshape(shape = attention_0_reshaped_shape_0, x = attention_0_channels_first_retransposed)[name = string("attention_0_reshaped")]; + tensor attention_0_outproj_weight_0 = const()[name = string("attention_0_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274993664)))]; + tensor attention_0_outproj_strides_0 = const()[name = string("attention_0_outproj_strides_0"), val = tensor([1])]; + string attention_0_outproj_pad_type_0 = const()[name = string("attention_0_outproj_pad_type_0"), val = string("valid")]; + tensor attention_0_outproj_pad_0 = const()[name = string("attention_0_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_0_outproj_dilations_0 = const()[name = string("attention_0_outproj_dilations_0"), val = tensor([1])]; + int32 attention_0_outproj_groups_0 = const()[name = string("attention_0_outproj_groups_0"), val = int32(1)]; + tensor attention_0_outproj = conv(dilations = attention_0_outproj_dilations_0, groups = attention_0_outproj_groups_0, pad = attention_0_outproj_pad_0, pad_type = attention_0_outproj_pad_type_0, strides = attention_0_outproj_strides_0, weight = attention_0_outproj_weight_0, x = attention_0_reshaped)[name = string("attention_0_outproj")]; + tensor block_0_residual_1 = add(x = input_embeddings_channels_first, y = attention_0_outproj)[name = string("block_0_residual_1")]; + tensor block_0_ffn_rmsnorm_abs = abs(x = block_0_residual_1)[name = string("block_0_ffn_rmsnorm_abs")]; + tensor block_0_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_0_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_0_ffn_rmsnorm_maxval = reduce_max(axes = block_0_ffn_rmsnorm_maxval_axes_0, keep_dims = block_0_ffn_rmsnorm_maxval_keep_dims_0, x = block_0_ffn_rmsnorm_abs)[name = string("block_0_ffn_rmsnorm_maxval")]; + fp16 block_0_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_0_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_0_ffn_rmsnorm_maxval_clipped = clip(alpha = block_0_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_0_ffn_rmsnorm_maxval_clipped_beta_0, x = block_0_ffn_rmsnorm_maxval)[name = string("block_0_ffn_rmsnorm_maxval_clipped")]; + tensor block_0_ffn_rmsnorm_scaled = real_div(x = block_0_residual_1, y = block_0_ffn_rmsnorm_maxval_clipped)[name = string("block_0_ffn_rmsnorm_scaled")]; + tensor block_0_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_0_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_0_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_0_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_0_ffn_rmsnorm_scaled)[name = string("block_0_ffn_rmsnorm_squared_sum")]; + fp16 block_0_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_0_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_0_ffn_rmsnorm_rsqrt_epsilon_0, x = block_0_ffn_rmsnorm_squared_sum)[name = string("block_0_ffn_rmsnorm_rsqrt")]; + fp16 block_0_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_0_ffn_rmsnorm_dim_scaled = mul(x = block_0_ffn_rmsnorm_scaled, y = block_0_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_0_ffn_rmsnorm_dim_scaled")]; + tensor block_0_ffn_rmsnorm_normalized = mul(x = block_0_ffn_rmsnorm_dim_scaled, y = block_0_ffn_rmsnorm_rsqrt)[name = string("block_0_ffn_rmsnorm_normalized")]; + tensor block_0_ffn_rmsnorm_y_0 = const()[name = string("block_0_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276599360)))]; + tensor block_0_ffn_rmsnorm = mul(x = block_0_ffn_rmsnorm_normalized, y = block_0_ffn_rmsnorm_y_0)[name = string("block_0_ffn_rmsnorm")]; + tensor block_0_ffn_inproj_weight_0 = const()[name = string("block_0_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276601216)))]; + tensor block_0_ffn_inproj_strides_0 = const()[name = string("block_0_ffn_inproj_strides_0"), val = tensor([1])]; + string block_0_ffn_inproj_pad_type_0 = const()[name = string("block_0_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_0_ffn_inproj_pad_0 = const()[name = string("block_0_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_0_ffn_inproj_dilations_0 = const()[name = string("block_0_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_0_ffn_inproj_groups_0 = const()[name = string("block_0_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_0_ffn_inproj = conv(dilations = block_0_ffn_inproj_dilations_0, groups = block_0_ffn_inproj_groups_0, pad = block_0_ffn_inproj_pad_0, pad_type = block_0_ffn_inproj_pad_type_0, strides = block_0_ffn_inproj_strides_0, weight = block_0_ffn_inproj_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_inproj")]; + tensor block_0_ffn_g_weight_0 = const()[name = string("block_0_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285317568)))]; + tensor block_0_ffn_g_strides_0 = const()[name = string("block_0_ffn_g_strides_0"), val = tensor([1])]; + string block_0_ffn_g_pad_type_0 = const()[name = string("block_0_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_0_ffn_g_pad_0 = const()[name = string("block_0_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_0_ffn_g_dilations_0 = const()[name = string("block_0_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_0_ffn_g_groups_0 = const()[name = string("block_0_ffn_g_groups_0"), val = int32(1)]; + tensor block_0_ffn_g = conv(dilations = block_0_ffn_g_dilations_0, groups = block_0_ffn_g_groups_0, pad = block_0_ffn_g_pad_0, pad_type = block_0_ffn_g_pad_type_0, strides = block_0_ffn_g_strides_0, weight = block_0_ffn_g_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_g")]; + tensor block_0_ffn_g_activation = silu(x = block_0_ffn_g)[name = string("block_0_ffn_g_activation")]; + tensor block_0_ffn_x_gated = mul(x = block_0_ffn_inproj, y = block_0_ffn_g_activation)[name = string("block_0_ffn_x_gated")]; + tensor block_0_ffn_outproj_weight_0 = const()[name = string("block_0_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294033920)))]; + tensor block_0_ffn_outproj_strides_0 = const()[name = string("block_0_ffn_outproj_strides_0"), val = tensor([1])]; + string block_0_ffn_outproj_pad_type_0 = const()[name = string("block_0_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_0_ffn_outproj_pad_0 = const()[name = string("block_0_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_0_ffn_outproj_dilations_0 = const()[name = string("block_0_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_0_ffn_outproj_groups_0 = const()[name = string("block_0_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_0_ffn_outproj = conv(dilations = block_0_ffn_outproj_dilations_0, groups = block_0_ffn_outproj_groups_0, pad = block_0_ffn_outproj_pad_0, pad_type = block_0_ffn_outproj_pad_type_0, strides = block_0_ffn_outproj_strides_0, weight = block_0_ffn_outproj_weight_0, x = block_0_ffn_x_gated)[name = string("block_0_ffn_outproj")]; + tensor block_0_residual_2 = add(x = block_0_ffn_outproj, y = block_0_residual_1)[name = string("block_0_residual_2")]; + tensor block_1_attention_rmsnorm_abs = abs(x = block_0_residual_2)[name = string("block_1_attention_rmsnorm_abs")]; + tensor block_1_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_1_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_1_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_1_attention_rmsnorm_maxval = reduce_max(axes = block_1_attention_rmsnorm_maxval_axes_0, keep_dims = block_1_attention_rmsnorm_maxval_keep_dims_0, x = block_1_attention_rmsnorm_abs)[name = string("block_1_attention_rmsnorm_maxval")]; + fp16 block_1_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_1_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_1_attention_rmsnorm_maxval_clipped = clip(alpha = block_1_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_1_attention_rmsnorm_maxval_clipped_beta_0, x = block_1_attention_rmsnorm_maxval)[name = string("block_1_attention_rmsnorm_maxval_clipped")]; + tensor block_1_attention_rmsnorm_scaled = real_div(x = block_0_residual_2, y = block_1_attention_rmsnorm_maxval_clipped)[name = string("block_1_attention_rmsnorm_scaled")]; + tensor block_1_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_1_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_1_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_1_attention_rmsnorm_squared_sum_keep_dims_0, x = block_1_attention_rmsnorm_scaled)[name = string("block_1_attention_rmsnorm_squared_sum")]; + fp16 block_1_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_1_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_1_attention_rmsnorm_rsqrt_epsilon_0, x = block_1_attention_rmsnorm_squared_sum)[name = string("block_1_attention_rmsnorm_rsqrt")]; + fp16 block_1_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_1_attention_rmsnorm_dim_scaled = mul(x = block_1_attention_rmsnorm_scaled, y = block_1_attention_rmsnorm_dim_scaled_y_0)[name = string("block_1_attention_rmsnorm_dim_scaled")]; + tensor block_1_attention_rmsnorm_normalized = mul(x = block_1_attention_rmsnorm_dim_scaled, y = block_1_attention_rmsnorm_rsqrt)[name = string("block_1_attention_rmsnorm_normalized")]; + tensor block_1_attention_rmsnorm_y_0 = const()[name = string("block_1_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302750272)))]; + tensor block_1_attention_rmsnorm = mul(x = block_1_attention_rmsnorm_normalized, y = block_1_attention_rmsnorm_y_0)[name = string("block_1_attention_rmsnorm")]; + tensor attention_1_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302752128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303526336))))[name = string("attention_1_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_0 = constexpr_blockwise_shift_scale(data = attention_1_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303563264))))[name = string("constexpr_blockwise_shift_scale_0")]; + tensor attention_1_qkvproj_bias_0 = const()[name = string("attention_1_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303565632)))]; + tensor attention_1_qkvproj_strides_0 = const()[name = string("attention_1_qkvproj_strides_0"), val = tensor([1])]; + string attention_1_qkvproj_pad_type_0 = const()[name = string("attention_1_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_1_qkvproj_pad_0 = const()[name = string("attention_1_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_1_qkvproj_dilations_0 = const()[name = string("attention_1_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_1_qkvproj_groups_0 = const()[name = string("attention_1_qkvproj_groups_0"), val = int32(1)]; + tensor attention_1_qkvproj = conv(bias = attention_1_qkvproj_bias_0, dilations = attention_1_qkvproj_dilations_0, groups = attention_1_qkvproj_groups_0, pad = attention_1_qkvproj_pad_0, pad_type = attention_1_qkvproj_pad_type_0, strides = attention_1_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_0, x = block_1_attention_rmsnorm)[name = string("attention_1_qkvproj")]; + tensor attention_1_head_reshape_shape_0 = const()[name = string("attention_1_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_1_head_reshape = reshape(shape = attention_1_head_reshape_shape_0, x = attention_1_qkvproj)[name = string("attention_1_head_reshape")]; + tensor attention_1_head_transpose_perm_0 = const()[name = string("attention_1_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_1_split_qkv_heads_axis_0 = const()[name = string("attention_1_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_1_split_qkv_heads_split_sizes_0 = const()[name = string("attention_1_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_1_head_transpose = transpose(perm = attention_1_head_transpose_perm_0, x = attention_1_head_reshape)[name = string("transpose_46")]; + tensor attention_1_split_qkv_heads_0, tensor attention_1_split_qkv_heads_1, tensor attention_1_split_qkv_heads_2 = split(axis = attention_1_split_qkv_heads_axis_0, split_sizes = attention_1_split_qkv_heads_split_sizes_0, x = attention_1_head_transpose)[name = string("attention_1_split_qkv_heads")]; + tensor attention_1_q_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_1_q_rope_lhs_mult")]; + int32 attention_1_q_rotate_half_split_num_splits_0 = const()[name = string("attention_1_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_1_q_rotate_half_split_axis_0 = const()[name = string("attention_1_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_1_q_rotate_half_split_0, tensor attention_1_q_rotate_half_split_1 = split(axis = attention_1_q_rotate_half_split_axis_0, num_splits = attention_1_q_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_0)[name = string("attention_1_q_rotate_half_split")]; + fp16 attention_1_q_rotate_half_neg_y_0 = const()[name = string("attention_1_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_1_q_rotate_half_neg = mul(x = attention_1_q_rotate_half_split_1, y = attention_1_q_rotate_half_neg_y_0)[name = string("attention_1_q_rotate_half_neg")]; + int32 attention_1_q_rotate_half_concat_axis_0 = const()[name = string("attention_1_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_1_q_rotate_half_concat_interleave_0 = const()[name = string("attention_1_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_1_q_rotate_half_concat = concat(axis = attention_1_q_rotate_half_concat_axis_0, interleave = attention_1_q_rotate_half_concat_interleave_0, values = (attention_1_q_rotate_half_neg, attention_1_q_rotate_half_split_0))[name = string("attention_1_q_rotate_half_concat")]; + tensor attention_1_q_rope_rhs_mult = mul(x = attention_1_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_q_rope_rhs_mult")]; + tensor attention_1_q_rope = add(x = attention_1_q_rope_lhs_mult, y = attention_1_q_rope_rhs_mult)[name = string("attention_1_q_rope")]; + tensor attention_1_k_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_1_k_rope_lhs_mult")]; + int32 attention_1_k_rotate_half_split_num_splits_0 = const()[name = string("attention_1_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_1_k_rotate_half_split_axis_0 = const()[name = string("attention_1_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_1_k_rotate_half_split_0, tensor attention_1_k_rotate_half_split_1 = split(axis = attention_1_k_rotate_half_split_axis_0, num_splits = attention_1_k_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_1)[name = string("attention_1_k_rotate_half_split")]; + fp16 attention_1_k_rotate_half_neg_y_0 = const()[name = string("attention_1_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_1_k_rotate_half_neg = mul(x = attention_1_k_rotate_half_split_1, y = attention_1_k_rotate_half_neg_y_0)[name = string("attention_1_k_rotate_half_neg")]; + int32 attention_1_k_rotate_half_concat_axis_0 = const()[name = string("attention_1_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_1_k_rotate_half_concat_interleave_0 = const()[name = string("attention_1_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_1_k_rotate_half_concat = concat(axis = attention_1_k_rotate_half_concat_axis_0, interleave = attention_1_k_rotate_half_concat_interleave_0, values = (attention_1_k_rotate_half_neg, attention_1_k_rotate_half_split_0))[name = string("attention_1_k_rotate_half_concat")]; + tensor attention_1_k_rope_rhs_mult = mul(x = attention_1_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_k_rope_rhs_mult")]; + tensor attention_1_k_rope = add(x = attention_1_k_rope_lhs_mult, y = attention_1_k_rope_rhs_mult)[name = string("attention_1_k_rope")]; + int32 attention_1_q_splits_axis_0 = const()[name = string("attention_1_q_splits_axis_0"), val = int32(1)]; + int32 attention_1_q_splits_num_splits_0 = const()[name = string("attention_1_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_1_q_splits_0, tensor attention_1_q_splits_1 = split(axis = attention_1_q_splits_axis_0, num_splits = attention_1_q_splits_num_splits_0, x = attention_1_q_rope)[name = string("attention_1_q_splits")]; + tensor attention_1_update_begin_0_values0_0 = const()[name = string("attention_1_update_begin_0_values0_0"), val = tensor([1])]; + tensor attention_1_update_begin_0_values1_0 = const()[name = string("attention_1_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_1_update_begin_0_values3_0 = const()[name = string("attention_1_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_1_update_begin_0_axis_0 = const()[name = string("attention_1_update_begin_0_axis_0"), val = int32(0)]; + bool attention_1_update_begin_0_interleave_0 = const()[name = string("attention_1_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_1_update_begin_0 = concat(axis = attention_1_update_begin_0_axis_0, interleave = attention_1_update_begin_0_interleave_0, values = (attention_1_update_begin_0_values0_0, attention_1_update_begin_0_values1_0, query_pos1, attention_1_update_begin_0_values3_0))[name = string("attention_1_update_begin_0")]; + tensor attention_1_update_end_0_values0_0 = const()[name = string("attention_1_update_end_0_values0_0"), val = tensor([2])]; + tensor attention_1_update_end_0_values1_0 = const()[name = string("attention_1_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_1_update_end_0_values3_0 = const()[name = string("attention_1_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_1_update_end_0_axis_0 = const()[name = string("attention_1_update_end_0_axis_0"), val = int32(0)]; + bool attention_1_update_end_0_interleave_0 = const()[name = string("attention_1_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_1_update_end_0 = concat(axis = attention_1_update_end_0_axis_0, interleave = attention_1_update_end_0_interleave_0, values = (attention_1_update_end_0_values0_0, attention_1_update_end_0_values1_0, end_pos_0, attention_1_update_end_0_values3_0))[name = string("attention_1_update_end_0")]; + tensor attention_1_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_updated_key_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_key_cache_0_squeeze_mask_0, update = attention_1_k_rope, x = coreml_update_state_0)[name = string("attention_1_updated_key_cache_0")]; + write_state(data = attention_1_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_2_write_state")]; + tensor coreml_update_state_2 = read_state(input = key_cache_state)[name = string("coreml_update_state_2")]; + tensor attention_1_key_cache_begin_0 = const()[name = string("attention_1_key_cache_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor attention_1_key_cache_end_0 = const()[name = string("attention_1_key_cache_end_0"), val = tensor([2, 2, 512, 64])]; + tensor attention_1_key_cache_squeeze_mask_0 = const()[name = string("attention_1_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_key_cache = slice_by_index(begin = attention_1_key_cache_begin_0, end = attention_1_key_cache_end_0, squeeze_mask = attention_1_key_cache_squeeze_mask_0, x = coreml_update_state_2)[name = string("attention_1_key_cache")]; + int32 attention_1_key_cache_head_axis_0 = const()[name = string("attention_1_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_1_key_cache_head_num_splits_0 = const()[name = string("attention_1_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_1_key_cache_head_0, tensor attention_1_key_cache_head_1 = split(axis = attention_1_key_cache_head_axis_0, num_splits = attention_1_key_cache_head_num_splits_0, x = attention_1_key_cache)[name = string("attention_1_key_cache_head")]; + tensor attention_1_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_updated_value_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_value_cache_0_squeeze_mask_0, update = attention_1_split_qkv_heads_2, x = coreml_update_state_1)[name = string("attention_1_updated_value_cache_0")]; + write_state(data = attention_1_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_3_write_state")]; + tensor coreml_update_state_3 = read_state(input = value_cache_state)[name = string("coreml_update_state_3")]; + tensor attention_1_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_1_slice_current_layer_value_cache_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor attention_1_slice_current_layer_value_cache_end_0 = const()[name = string("attention_1_slice_current_layer_value_cache_end_0"), val = tensor([2, 2, 512, 64])]; + tensor attention_1_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_1_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_slice_current_layer_value_cache = slice_by_index(begin = attention_1_slice_current_layer_value_cache_begin_0, end = attention_1_slice_current_layer_value_cache_end_0, squeeze_mask = attention_1_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_3)[name = string("attention_1_slice_current_layer_value_cache")]; + int32 attention_1_slice_value_cache_heads_axis_0 = const()[name = string("attention_1_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_1_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_1_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_1_slice_value_cache_heads_0, tensor attention_1_slice_value_cache_heads_1 = split(axis = attention_1_slice_value_cache_heads_axis_0, num_splits = attention_1_slice_value_cache_heads_num_splits_0, x = attention_1_slice_current_layer_value_cache)[name = string("attention_1_slice_value_cache_heads")]; + bool attention_1_scores_0_transpose_y_0 = const()[name = string("attention_1_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_1_scores_0_transpose_x_0 = const()[name = string("attention_1_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_1_scores_0 = matmul(transpose_x = attention_1_scores_0_transpose_x_0, transpose_y = attention_1_scores_0_transpose_y_0, x = attention_1_key_cache_head_0, y = attention_1_q_splits_0)[name = string("attention_1_scores_0")]; + fp16 attention_1_scaled_scores_0_y_0 = const()[name = string("attention_1_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_1_scaled_scores_0 = mul(x = attention_1_scores_0, y = attention_1_scaled_scores_0_y_0)[name = string("attention_1_scaled_scores_0")]; + tensor attention_1_masked_scaled_scores_0 = add(x = attention_1_scaled_scores_0, y = transpose_0)[name = string("attention_1_masked_scaled_scores_0")]; + int32 softmax_2_axis_0 = const()[name = string("softmax_2_axis_0"), val = int32(-2)]; + tensor softmax_2 = softmax(axis = softmax_2_axis_0, x = attention_1_masked_scaled_scores_0)[name = string("softmax_2")]; + bool attention_1_attention_0_transpose_x_0 = const()[name = string("attention_1_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_1_attention_0_transpose_y_0 = const()[name = string("attention_1_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_1_attention_0 = matmul(transpose_x = attention_1_attention_0_transpose_x_0, transpose_y = attention_1_attention_0_transpose_y_0, x = softmax_2, y = attention_1_slice_value_cache_heads_0)[name = string("attention_1_attention_0")]; + bool attention_1_scores_1_transpose_y_0 = const()[name = string("attention_1_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_1_scores_1_transpose_x_0 = const()[name = string("attention_1_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_1_scores_1 = matmul(transpose_x = attention_1_scores_1_transpose_x_0, transpose_y = attention_1_scores_1_transpose_y_0, x = attention_1_key_cache_head_1, y = attention_1_q_splits_1)[name = string("attention_1_scores_1")]; + fp16 attention_1_scaled_scores_1_y_0 = const()[name = string("attention_1_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_1_scaled_scores_1 = mul(x = attention_1_scores_1, y = attention_1_scaled_scores_1_y_0)[name = string("attention_1_scaled_scores_1")]; + tensor attention_1_masked_scaled_scores_1 = add(x = attention_1_scaled_scores_1, y = transpose_0)[name = string("attention_1_masked_scaled_scores_1")]; + int32 softmax_3_axis_0 = const()[name = string("softmax_3_axis_0"), val = int32(-2)]; + tensor softmax_3 = softmax(axis = softmax_3_axis_0, x = attention_1_masked_scaled_scores_1)[name = string("softmax_3")]; + bool attention_1_attention_1_transpose_x_0 = const()[name = string("attention_1_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_1_attention_1_transpose_y_0 = const()[name = string("attention_1_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_1_attention_1 = matmul(transpose_x = attention_1_attention_1_transpose_x_0, transpose_y = attention_1_attention_1_transpose_y_0, x = softmax_3, y = attention_1_slice_value_cache_heads_1)[name = string("attention_1_attention_1")]; + int32 attention_1_concat_attention_all_heads_axis_0 = const()[name = string("attention_1_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_1_concat_attention_all_heads_interleave_0 = const()[name = string("attention_1_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_1_concat_attention_all_heads = concat(axis = attention_1_concat_attention_all_heads_axis_0, interleave = attention_1_concat_attention_all_heads_interleave_0, values = (attention_1_attention_0, attention_1_attention_1))[name = string("attention_1_concat_attention_all_heads")]; + tensor attention_1_channels_first_retransposed_perm_0 = const()[name = string("attention_1_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_1_reshaped_shape_0 = const()[name = string("attention_1_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_1_channels_first_retransposed = transpose(perm = attention_1_channels_first_retransposed_perm_0, x = attention_1_concat_attention_all_heads)[name = string("transpose_45")]; + tensor attention_1_reshaped = reshape(shape = attention_1_reshaped_shape_0, x = attention_1_channels_first_retransposed)[name = string("attention_1_reshaped")]; + tensor attention_1_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303568000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304170176))))[name = string("attention_1_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_1 = constexpr_blockwise_shift_scale(data = attention_1_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304198912))))[name = string("constexpr_blockwise_shift_scale_1")]; + tensor attention_1_outproj_strides_0 = const()[name = string("attention_1_outproj_strides_0"), val = tensor([1])]; + string attention_1_outproj_pad_type_0 = const()[name = string("attention_1_outproj_pad_type_0"), val = string("valid")]; + tensor attention_1_outproj_pad_0 = const()[name = string("attention_1_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_1_outproj_dilations_0 = const()[name = string("attention_1_outproj_dilations_0"), val = tensor([1])]; + int32 attention_1_outproj_groups_0 = const()[name = string("attention_1_outproj_groups_0"), val = int32(1)]; + tensor attention_1_outproj = conv(dilations = attention_1_outproj_dilations_0, groups = attention_1_outproj_groups_0, pad = attention_1_outproj_pad_0, pad_type = attention_1_outproj_pad_type_0, strides = attention_1_outproj_strides_0, weight = constexpr_blockwise_shift_scale_1, x = attention_1_reshaped)[name = string("attention_1_outproj")]; + tensor block_1_residual_1 = add(x = block_0_residual_2, y = attention_1_outproj)[name = string("block_1_residual_1")]; + tensor block_1_ffn_rmsnorm_abs = abs(x = block_1_residual_1)[name = string("block_1_ffn_rmsnorm_abs")]; + tensor block_1_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_1_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_1_ffn_rmsnorm_maxval = reduce_max(axes = block_1_ffn_rmsnorm_maxval_axes_0, keep_dims = block_1_ffn_rmsnorm_maxval_keep_dims_0, x = block_1_ffn_rmsnorm_abs)[name = string("block_1_ffn_rmsnorm_maxval")]; + fp16 block_1_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_1_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_1_ffn_rmsnorm_maxval_clipped = clip(alpha = block_1_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_1_ffn_rmsnorm_maxval_clipped_beta_0, x = block_1_ffn_rmsnorm_maxval)[name = string("block_1_ffn_rmsnorm_maxval_clipped")]; + tensor block_1_ffn_rmsnorm_scaled = real_div(x = block_1_residual_1, y = block_1_ffn_rmsnorm_maxval_clipped)[name = string("block_1_ffn_rmsnorm_scaled")]; + tensor block_1_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_1_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_1_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_1_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_1_ffn_rmsnorm_scaled)[name = string("block_1_ffn_rmsnorm_squared_sum")]; + fp16 block_1_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_1_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_1_ffn_rmsnorm_rsqrt_epsilon_0, x = block_1_ffn_rmsnorm_squared_sum)[name = string("block_1_ffn_rmsnorm_rsqrt")]; + fp16 block_1_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_1_ffn_rmsnorm_dim_scaled = mul(x = block_1_ffn_rmsnorm_scaled, y = block_1_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_1_ffn_rmsnorm_dim_scaled")]; + tensor block_1_ffn_rmsnorm_normalized = mul(x = block_1_ffn_rmsnorm_dim_scaled, y = block_1_ffn_rmsnorm_rsqrt)[name = string("block_1_ffn_rmsnorm_normalized")]; + tensor block_1_ffn_rmsnorm_y_0 = const()[name = string("block_1_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304200768)))]; + tensor block_1_ffn_rmsnorm = mul(x = block_1_ffn_rmsnorm_normalized, y = block_1_ffn_rmsnorm_y_0)[name = string("block_1_ffn_rmsnorm")]; + tensor block_1_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304202624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307471296))))[name = string("block_1_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_2 = constexpr_blockwise_shift_scale(data = block_1_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307627008))))[name = string("constexpr_blockwise_shift_scale_2")]; + tensor block_1_ffn_inproj_strides_0 = const()[name = string("block_1_ffn_inproj_strides_0"), val = tensor([1])]; + string block_1_ffn_inproj_pad_type_0 = const()[name = string("block_1_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_1_ffn_inproj_pad_0 = const()[name = string("block_1_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_1_ffn_inproj_dilations_0 = const()[name = string("block_1_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_1_ffn_inproj_groups_0 = const()[name = string("block_1_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_1_ffn_inproj = conv(dilations = block_1_ffn_inproj_dilations_0, groups = block_1_ffn_inproj_groups_0, pad = block_1_ffn_inproj_pad_0, pad_type = block_1_ffn_inproj_pad_type_0, strides = block_1_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_2, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_inproj")]; + tensor block_1_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307636800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310905472))))[name = string("block_1_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_3 = constexpr_blockwise_shift_scale(data = block_1_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311061184))))[name = string("constexpr_blockwise_shift_scale_3")]; + tensor block_1_ffn_g_strides_0 = const()[name = string("block_1_ffn_g_strides_0"), val = tensor([1])]; + string block_1_ffn_g_pad_type_0 = const()[name = string("block_1_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_1_ffn_g_pad_0 = const()[name = string("block_1_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_1_ffn_g_dilations_0 = const()[name = string("block_1_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_1_ffn_g_groups_0 = const()[name = string("block_1_ffn_g_groups_0"), val = int32(1)]; + tensor block_1_ffn_g = conv(dilations = block_1_ffn_g_dilations_0, groups = block_1_ffn_g_groups_0, pad = block_1_ffn_g_pad_0, pad_type = block_1_ffn_g_pad_type_0, strides = block_1_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_3, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_g")]; + tensor block_1_ffn_g_activation = silu(x = block_1_ffn_g)[name = string("block_1_ffn_g_activation")]; + tensor block_1_ffn_x_gated = mul(x = block_1_ffn_inproj, y = block_1_ffn_g_activation)[name = string("block_1_ffn_x_gated")]; + tensor block_1_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311070976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314339648))))[name = string("block_1_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_4 = constexpr_blockwise_shift_scale(data = block_1_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314368384))))[name = string("constexpr_blockwise_shift_scale_4")]; + tensor block_1_ffn_outproj_strides_0 = const()[name = string("block_1_ffn_outproj_strides_0"), val = tensor([1])]; + string block_1_ffn_outproj_pad_type_0 = const()[name = string("block_1_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_1_ffn_outproj_pad_0 = const()[name = string("block_1_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_1_ffn_outproj_dilations_0 = const()[name = string("block_1_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_1_ffn_outproj_groups_0 = const()[name = string("block_1_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_1_ffn_outproj = conv(dilations = block_1_ffn_outproj_dilations_0, groups = block_1_ffn_outproj_groups_0, pad = block_1_ffn_outproj_pad_0, pad_type = block_1_ffn_outproj_pad_type_0, strides = block_1_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_4, x = block_1_ffn_x_gated)[name = string("block_1_ffn_outproj")]; + tensor block_1_residual_2 = add(x = block_1_ffn_outproj, y = block_1_residual_1)[name = string("block_1_residual_2")]; + tensor block_2_attention_rmsnorm_abs = abs(x = block_1_residual_2)[name = string("block_2_attention_rmsnorm_abs")]; + tensor block_2_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_2_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_2_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_2_attention_rmsnorm_maxval = reduce_max(axes = block_2_attention_rmsnorm_maxval_axes_0, keep_dims = block_2_attention_rmsnorm_maxval_keep_dims_0, x = block_2_attention_rmsnorm_abs)[name = string("block_2_attention_rmsnorm_maxval")]; + fp16 block_2_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_2_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_2_attention_rmsnorm_maxval_clipped = clip(alpha = block_2_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_2_attention_rmsnorm_maxval_clipped_beta_0, x = block_2_attention_rmsnorm_maxval)[name = string("block_2_attention_rmsnorm_maxval_clipped")]; + tensor block_2_attention_rmsnorm_scaled = real_div(x = block_1_residual_2, y = block_2_attention_rmsnorm_maxval_clipped)[name = string("block_2_attention_rmsnorm_scaled")]; + tensor block_2_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_2_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_2_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_2_attention_rmsnorm_squared_sum_keep_dims_0, x = block_2_attention_rmsnorm_scaled)[name = string("block_2_attention_rmsnorm_squared_sum")]; + fp16 block_2_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_2_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_2_attention_rmsnorm_rsqrt_epsilon_0, x = block_2_attention_rmsnorm_squared_sum)[name = string("block_2_attention_rmsnorm_rsqrt")]; + fp16 block_2_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_2_attention_rmsnorm_dim_scaled = mul(x = block_2_attention_rmsnorm_scaled, y = block_2_attention_rmsnorm_dim_scaled_y_0)[name = string("block_2_attention_rmsnorm_dim_scaled")]; + tensor block_2_attention_rmsnorm_normalized = mul(x = block_2_attention_rmsnorm_dim_scaled, y = block_2_attention_rmsnorm_rsqrt)[name = string("block_2_attention_rmsnorm_normalized")]; + tensor block_2_attention_rmsnorm_y_0 = const()[name = string("block_2_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314370240)))]; + tensor block_2_attention_rmsnorm = mul(x = block_2_attention_rmsnorm_normalized, y = block_2_attention_rmsnorm_y_0)[name = string("block_2_attention_rmsnorm")]; + tensor attention_2_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314372096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315146304))))[name = string("attention_2_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_5 = constexpr_blockwise_shift_scale(data = attention_2_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315183232))))[name = string("constexpr_blockwise_shift_scale_5")]; + tensor attention_2_qkvproj_bias_0 = const()[name = string("attention_2_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315185600)))]; + tensor attention_2_qkvproj_strides_0 = const()[name = string("attention_2_qkvproj_strides_0"), val = tensor([1])]; + string attention_2_qkvproj_pad_type_0 = const()[name = string("attention_2_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_2_qkvproj_pad_0 = const()[name = string("attention_2_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_2_qkvproj_dilations_0 = const()[name = string("attention_2_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_2_qkvproj_groups_0 = const()[name = string("attention_2_qkvproj_groups_0"), val = int32(1)]; + tensor attention_2_qkvproj = conv(bias = attention_2_qkvproj_bias_0, dilations = attention_2_qkvproj_dilations_0, groups = attention_2_qkvproj_groups_0, pad = attention_2_qkvproj_pad_0, pad_type = attention_2_qkvproj_pad_type_0, strides = attention_2_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_5, x = block_2_attention_rmsnorm)[name = string("attention_2_qkvproj")]; + tensor attention_2_head_reshape_shape_0 = const()[name = string("attention_2_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_2_head_reshape = reshape(shape = attention_2_head_reshape_shape_0, x = attention_2_qkvproj)[name = string("attention_2_head_reshape")]; + tensor attention_2_head_transpose_perm_0 = const()[name = string("attention_2_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_2_split_qkv_heads_axis_0 = const()[name = string("attention_2_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_2_split_qkv_heads_split_sizes_0 = const()[name = string("attention_2_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_2_head_transpose = transpose(perm = attention_2_head_transpose_perm_0, x = attention_2_head_reshape)[name = string("transpose_44")]; + tensor attention_2_split_qkv_heads_0, tensor attention_2_split_qkv_heads_1, tensor attention_2_split_qkv_heads_2 = split(axis = attention_2_split_qkv_heads_axis_0, split_sizes = attention_2_split_qkv_heads_split_sizes_0, x = attention_2_head_transpose)[name = string("attention_2_split_qkv_heads")]; + tensor attention_2_q_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_2_q_rope_lhs_mult")]; + int32 attention_2_q_rotate_half_split_num_splits_0 = const()[name = string("attention_2_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_2_q_rotate_half_split_axis_0 = const()[name = string("attention_2_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_2_q_rotate_half_split_0, tensor attention_2_q_rotate_half_split_1 = split(axis = attention_2_q_rotate_half_split_axis_0, num_splits = attention_2_q_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_0)[name = string("attention_2_q_rotate_half_split")]; + fp16 attention_2_q_rotate_half_neg_y_0 = const()[name = string("attention_2_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_2_q_rotate_half_neg = mul(x = attention_2_q_rotate_half_split_1, y = attention_2_q_rotate_half_neg_y_0)[name = string("attention_2_q_rotate_half_neg")]; + int32 attention_2_q_rotate_half_concat_axis_0 = const()[name = string("attention_2_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_2_q_rotate_half_concat_interleave_0 = const()[name = string("attention_2_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_2_q_rotate_half_concat = concat(axis = attention_2_q_rotate_half_concat_axis_0, interleave = attention_2_q_rotate_half_concat_interleave_0, values = (attention_2_q_rotate_half_neg, attention_2_q_rotate_half_split_0))[name = string("attention_2_q_rotate_half_concat")]; + tensor attention_2_q_rope_rhs_mult = mul(x = attention_2_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_q_rope_rhs_mult")]; + tensor attention_2_q_rope = add(x = attention_2_q_rope_lhs_mult, y = attention_2_q_rope_rhs_mult)[name = string("attention_2_q_rope")]; + tensor attention_2_k_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_2_k_rope_lhs_mult")]; + int32 attention_2_k_rotate_half_split_num_splits_0 = const()[name = string("attention_2_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_2_k_rotate_half_split_axis_0 = const()[name = string("attention_2_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_2_k_rotate_half_split_0, tensor attention_2_k_rotate_half_split_1 = split(axis = attention_2_k_rotate_half_split_axis_0, num_splits = attention_2_k_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_1)[name = string("attention_2_k_rotate_half_split")]; + fp16 attention_2_k_rotate_half_neg_y_0 = const()[name = string("attention_2_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_2_k_rotate_half_neg = mul(x = attention_2_k_rotate_half_split_1, y = attention_2_k_rotate_half_neg_y_0)[name = string("attention_2_k_rotate_half_neg")]; + int32 attention_2_k_rotate_half_concat_axis_0 = const()[name = string("attention_2_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_2_k_rotate_half_concat_interleave_0 = const()[name = string("attention_2_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_2_k_rotate_half_concat = concat(axis = attention_2_k_rotate_half_concat_axis_0, interleave = attention_2_k_rotate_half_concat_interleave_0, values = (attention_2_k_rotate_half_neg, attention_2_k_rotate_half_split_0))[name = string("attention_2_k_rotate_half_concat")]; + tensor attention_2_k_rope_rhs_mult = mul(x = attention_2_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_k_rope_rhs_mult")]; + tensor attention_2_k_rope = add(x = attention_2_k_rope_lhs_mult, y = attention_2_k_rope_rhs_mult)[name = string("attention_2_k_rope")]; + int32 attention_2_q_splits_axis_0 = const()[name = string("attention_2_q_splits_axis_0"), val = int32(1)]; + int32 attention_2_q_splits_num_splits_0 = const()[name = string("attention_2_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_2_q_splits_0, tensor attention_2_q_splits_1 = split(axis = attention_2_q_splits_axis_0, num_splits = attention_2_q_splits_num_splits_0, x = attention_2_q_rope)[name = string("attention_2_q_splits")]; + tensor attention_2_update_begin_0_values0_0 = const()[name = string("attention_2_update_begin_0_values0_0"), val = tensor([2])]; + tensor attention_2_update_begin_0_values1_0 = const()[name = string("attention_2_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_2_update_begin_0_values3_0 = const()[name = string("attention_2_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_2_update_begin_0_axis_0 = const()[name = string("attention_2_update_begin_0_axis_0"), val = int32(0)]; + bool attention_2_update_begin_0_interleave_0 = const()[name = string("attention_2_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_2_update_begin_0 = concat(axis = attention_2_update_begin_0_axis_0, interleave = attention_2_update_begin_0_interleave_0, values = (attention_2_update_begin_0_values0_0, attention_2_update_begin_0_values1_0, query_pos1, attention_2_update_begin_0_values3_0))[name = string("attention_2_update_begin_0")]; + tensor attention_2_update_end_0_values0_0 = const()[name = string("attention_2_update_end_0_values0_0"), val = tensor([3])]; + tensor attention_2_update_end_0_values1_0 = const()[name = string("attention_2_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_2_update_end_0_values3_0 = const()[name = string("attention_2_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_2_update_end_0_axis_0 = const()[name = string("attention_2_update_end_0_axis_0"), val = int32(0)]; + bool attention_2_update_end_0_interleave_0 = const()[name = string("attention_2_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_2_update_end_0 = concat(axis = attention_2_update_end_0_axis_0, interleave = attention_2_update_end_0_interleave_0, values = (attention_2_update_end_0_values0_0, attention_2_update_end_0_values1_0, end_pos_0, attention_2_update_end_0_values3_0))[name = string("attention_2_update_end_0")]; + tensor attention_2_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_updated_key_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_key_cache_0_squeeze_mask_0, update = attention_2_k_rope, x = coreml_update_state_2)[name = string("attention_2_updated_key_cache_0")]; + write_state(data = attention_2_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_4_write_state")]; + tensor coreml_update_state_4 = read_state(input = key_cache_state)[name = string("coreml_update_state_4")]; + tensor attention_2_key_cache_begin_0 = const()[name = string("attention_2_key_cache_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor attention_2_key_cache_end_0 = const()[name = string("attention_2_key_cache_end_0"), val = tensor([3, 2, 512, 64])]; + tensor attention_2_key_cache_squeeze_mask_0 = const()[name = string("attention_2_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_key_cache = slice_by_index(begin = attention_2_key_cache_begin_0, end = attention_2_key_cache_end_0, squeeze_mask = attention_2_key_cache_squeeze_mask_0, x = coreml_update_state_4)[name = string("attention_2_key_cache")]; + int32 attention_2_key_cache_head_axis_0 = const()[name = string("attention_2_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_2_key_cache_head_num_splits_0 = const()[name = string("attention_2_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_2_key_cache_head_0, tensor attention_2_key_cache_head_1 = split(axis = attention_2_key_cache_head_axis_0, num_splits = attention_2_key_cache_head_num_splits_0, x = attention_2_key_cache)[name = string("attention_2_key_cache_head")]; + tensor attention_2_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_updated_value_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_value_cache_0_squeeze_mask_0, update = attention_2_split_qkv_heads_2, x = coreml_update_state_3)[name = string("attention_2_updated_value_cache_0")]; + write_state(data = attention_2_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_5_write_state")]; + tensor coreml_update_state_5 = read_state(input = value_cache_state)[name = string("coreml_update_state_5")]; + tensor attention_2_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_2_slice_current_layer_value_cache_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor attention_2_slice_current_layer_value_cache_end_0 = const()[name = string("attention_2_slice_current_layer_value_cache_end_0"), val = tensor([3, 2, 512, 64])]; + tensor attention_2_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_2_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_slice_current_layer_value_cache = slice_by_index(begin = attention_2_slice_current_layer_value_cache_begin_0, end = attention_2_slice_current_layer_value_cache_end_0, squeeze_mask = attention_2_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_5)[name = string("attention_2_slice_current_layer_value_cache")]; + int32 attention_2_slice_value_cache_heads_axis_0 = const()[name = string("attention_2_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_2_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_2_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_2_slice_value_cache_heads_0, tensor attention_2_slice_value_cache_heads_1 = split(axis = attention_2_slice_value_cache_heads_axis_0, num_splits = attention_2_slice_value_cache_heads_num_splits_0, x = attention_2_slice_current_layer_value_cache)[name = string("attention_2_slice_value_cache_heads")]; + bool attention_2_scores_0_transpose_y_0 = const()[name = string("attention_2_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_2_scores_0_transpose_x_0 = const()[name = string("attention_2_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_2_scores_0 = matmul(transpose_x = attention_2_scores_0_transpose_x_0, transpose_y = attention_2_scores_0_transpose_y_0, x = attention_2_key_cache_head_0, y = attention_2_q_splits_0)[name = string("attention_2_scores_0")]; + fp16 attention_2_scaled_scores_0_y_0 = const()[name = string("attention_2_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_2_scaled_scores_0 = mul(x = attention_2_scores_0, y = attention_2_scaled_scores_0_y_0)[name = string("attention_2_scaled_scores_0")]; + tensor attention_2_masked_scaled_scores_0 = add(x = attention_2_scaled_scores_0, y = transpose_0)[name = string("attention_2_masked_scaled_scores_0")]; + int32 softmax_4_axis_0 = const()[name = string("softmax_4_axis_0"), val = int32(-2)]; + tensor softmax_4 = softmax(axis = softmax_4_axis_0, x = attention_2_masked_scaled_scores_0)[name = string("softmax_4")]; + bool attention_2_attention_0_transpose_x_0 = const()[name = string("attention_2_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_2_attention_0_transpose_y_0 = const()[name = string("attention_2_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_2_attention_0 = matmul(transpose_x = attention_2_attention_0_transpose_x_0, transpose_y = attention_2_attention_0_transpose_y_0, x = softmax_4, y = attention_2_slice_value_cache_heads_0)[name = string("attention_2_attention_0")]; + bool attention_2_scores_1_transpose_y_0 = const()[name = string("attention_2_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_2_scores_1_transpose_x_0 = const()[name = string("attention_2_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_2_scores_1 = matmul(transpose_x = attention_2_scores_1_transpose_x_0, transpose_y = attention_2_scores_1_transpose_y_0, x = attention_2_key_cache_head_1, y = attention_2_q_splits_1)[name = string("attention_2_scores_1")]; + fp16 attention_2_scaled_scores_1_y_0 = const()[name = string("attention_2_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_2_scaled_scores_1 = mul(x = attention_2_scores_1, y = attention_2_scaled_scores_1_y_0)[name = string("attention_2_scaled_scores_1")]; + tensor attention_2_masked_scaled_scores_1 = add(x = attention_2_scaled_scores_1, y = transpose_0)[name = string("attention_2_masked_scaled_scores_1")]; + int32 softmax_5_axis_0 = const()[name = string("softmax_5_axis_0"), val = int32(-2)]; + tensor softmax_5 = softmax(axis = softmax_5_axis_0, x = attention_2_masked_scaled_scores_1)[name = string("softmax_5")]; + bool attention_2_attention_1_transpose_x_0 = const()[name = string("attention_2_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_2_attention_1_transpose_y_0 = const()[name = string("attention_2_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_2_attention_1 = matmul(transpose_x = attention_2_attention_1_transpose_x_0, transpose_y = attention_2_attention_1_transpose_y_0, x = softmax_5, y = attention_2_slice_value_cache_heads_1)[name = string("attention_2_attention_1")]; + int32 attention_2_concat_attention_all_heads_axis_0 = const()[name = string("attention_2_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_2_concat_attention_all_heads_interleave_0 = const()[name = string("attention_2_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_2_concat_attention_all_heads = concat(axis = attention_2_concat_attention_all_heads_axis_0, interleave = attention_2_concat_attention_all_heads_interleave_0, values = (attention_2_attention_0, attention_2_attention_1))[name = string("attention_2_concat_attention_all_heads")]; + tensor attention_2_channels_first_retransposed_perm_0 = const()[name = string("attention_2_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_2_reshaped_shape_0 = const()[name = string("attention_2_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_2_channels_first_retransposed = transpose(perm = attention_2_channels_first_retransposed_perm_0, x = attention_2_concat_attention_all_heads)[name = string("transpose_43")]; + tensor attention_2_reshaped = reshape(shape = attention_2_reshaped_shape_0, x = attention_2_channels_first_retransposed)[name = string("attention_2_reshaped")]; + tensor attention_2_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315187968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315790144))))[name = string("attention_2_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_6 = constexpr_blockwise_shift_scale(data = attention_2_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315818880))))[name = string("constexpr_blockwise_shift_scale_6")]; + tensor attention_2_outproj_strides_0 = const()[name = string("attention_2_outproj_strides_0"), val = tensor([1])]; + string attention_2_outproj_pad_type_0 = const()[name = string("attention_2_outproj_pad_type_0"), val = string("valid")]; + tensor attention_2_outproj_pad_0 = const()[name = string("attention_2_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_2_outproj_dilations_0 = const()[name = string("attention_2_outproj_dilations_0"), val = tensor([1])]; + int32 attention_2_outproj_groups_0 = const()[name = string("attention_2_outproj_groups_0"), val = int32(1)]; + tensor attention_2_outproj = conv(dilations = attention_2_outproj_dilations_0, groups = attention_2_outproj_groups_0, pad = attention_2_outproj_pad_0, pad_type = attention_2_outproj_pad_type_0, strides = attention_2_outproj_strides_0, weight = constexpr_blockwise_shift_scale_6, x = attention_2_reshaped)[name = string("attention_2_outproj")]; + tensor block_2_residual_1 = add(x = block_1_residual_2, y = attention_2_outproj)[name = string("block_2_residual_1")]; + tensor block_2_ffn_rmsnorm_abs = abs(x = block_2_residual_1)[name = string("block_2_ffn_rmsnorm_abs")]; + tensor block_2_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_2_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_2_ffn_rmsnorm_maxval = reduce_max(axes = block_2_ffn_rmsnorm_maxval_axes_0, keep_dims = block_2_ffn_rmsnorm_maxval_keep_dims_0, x = block_2_ffn_rmsnorm_abs)[name = string("block_2_ffn_rmsnorm_maxval")]; + fp16 block_2_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_2_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_2_ffn_rmsnorm_maxval_clipped = clip(alpha = block_2_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_2_ffn_rmsnorm_maxval_clipped_beta_0, x = block_2_ffn_rmsnorm_maxval)[name = string("block_2_ffn_rmsnorm_maxval_clipped")]; + tensor block_2_ffn_rmsnorm_scaled = real_div(x = block_2_residual_1, y = block_2_ffn_rmsnorm_maxval_clipped)[name = string("block_2_ffn_rmsnorm_scaled")]; + tensor block_2_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_2_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_2_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_2_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_2_ffn_rmsnorm_scaled)[name = string("block_2_ffn_rmsnorm_squared_sum")]; + fp16 block_2_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_2_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_2_ffn_rmsnorm_rsqrt_epsilon_0, x = block_2_ffn_rmsnorm_squared_sum)[name = string("block_2_ffn_rmsnorm_rsqrt")]; + fp16 block_2_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_2_ffn_rmsnorm_dim_scaled = mul(x = block_2_ffn_rmsnorm_scaled, y = block_2_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_2_ffn_rmsnorm_dim_scaled")]; + tensor block_2_ffn_rmsnorm_normalized = mul(x = block_2_ffn_rmsnorm_dim_scaled, y = block_2_ffn_rmsnorm_rsqrt)[name = string("block_2_ffn_rmsnorm_normalized")]; + tensor block_2_ffn_rmsnorm_y_0 = const()[name = string("block_2_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315820736)))]; + tensor block_2_ffn_rmsnorm = mul(x = block_2_ffn_rmsnorm_normalized, y = block_2_ffn_rmsnorm_y_0)[name = string("block_2_ffn_rmsnorm")]; + tensor block_2_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315822592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319091264))))[name = string("block_2_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_7 = constexpr_blockwise_shift_scale(data = block_2_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319246976))))[name = string("constexpr_blockwise_shift_scale_7")]; + tensor block_2_ffn_inproj_strides_0 = const()[name = string("block_2_ffn_inproj_strides_0"), val = tensor([1])]; + string block_2_ffn_inproj_pad_type_0 = const()[name = string("block_2_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_2_ffn_inproj_pad_0 = const()[name = string("block_2_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_2_ffn_inproj_dilations_0 = const()[name = string("block_2_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_2_ffn_inproj_groups_0 = const()[name = string("block_2_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_2_ffn_inproj = conv(dilations = block_2_ffn_inproj_dilations_0, groups = block_2_ffn_inproj_groups_0, pad = block_2_ffn_inproj_pad_0, pad_type = block_2_ffn_inproj_pad_type_0, strides = block_2_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_7, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_inproj")]; + tensor block_2_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319256768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322525440))))[name = string("block_2_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_8 = constexpr_blockwise_shift_scale(data = block_2_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322681152))))[name = string("constexpr_blockwise_shift_scale_8")]; + tensor block_2_ffn_g_strides_0 = const()[name = string("block_2_ffn_g_strides_0"), val = tensor([1])]; + string block_2_ffn_g_pad_type_0 = const()[name = string("block_2_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_2_ffn_g_pad_0 = const()[name = string("block_2_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_2_ffn_g_dilations_0 = const()[name = string("block_2_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_2_ffn_g_groups_0 = const()[name = string("block_2_ffn_g_groups_0"), val = int32(1)]; + tensor block_2_ffn_g = conv(dilations = block_2_ffn_g_dilations_0, groups = block_2_ffn_g_groups_0, pad = block_2_ffn_g_pad_0, pad_type = block_2_ffn_g_pad_type_0, strides = block_2_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_8, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_g")]; + tensor block_2_ffn_g_activation = silu(x = block_2_ffn_g)[name = string("block_2_ffn_g_activation")]; + tensor block_2_ffn_x_gated = mul(x = block_2_ffn_inproj, y = block_2_ffn_g_activation)[name = string("block_2_ffn_x_gated")]; + tensor block_2_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322690944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325959616))))[name = string("block_2_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_9 = constexpr_blockwise_shift_scale(data = block_2_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325988352))))[name = string("constexpr_blockwise_shift_scale_9")]; + tensor block_2_ffn_outproj_strides_0 = const()[name = string("block_2_ffn_outproj_strides_0"), val = tensor([1])]; + string block_2_ffn_outproj_pad_type_0 = const()[name = string("block_2_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_2_ffn_outproj_pad_0 = const()[name = string("block_2_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_2_ffn_outproj_dilations_0 = const()[name = string("block_2_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_2_ffn_outproj_groups_0 = const()[name = string("block_2_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_2_ffn_outproj = conv(dilations = block_2_ffn_outproj_dilations_0, groups = block_2_ffn_outproj_groups_0, pad = block_2_ffn_outproj_pad_0, pad_type = block_2_ffn_outproj_pad_type_0, strides = block_2_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_9, x = block_2_ffn_x_gated)[name = string("block_2_ffn_outproj")]; + tensor block_2_residual_2 = add(x = block_2_ffn_outproj, y = block_2_residual_1)[name = string("block_2_residual_2")]; + tensor block_3_attention_rmsnorm_abs = abs(x = block_2_residual_2)[name = string("block_3_attention_rmsnorm_abs")]; + tensor block_3_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_3_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_3_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_3_attention_rmsnorm_maxval = reduce_max(axes = block_3_attention_rmsnorm_maxval_axes_0, keep_dims = block_3_attention_rmsnorm_maxval_keep_dims_0, x = block_3_attention_rmsnorm_abs)[name = string("block_3_attention_rmsnorm_maxval")]; + fp16 block_3_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_3_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_3_attention_rmsnorm_maxval_clipped = clip(alpha = block_3_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_3_attention_rmsnorm_maxval_clipped_beta_0, x = block_3_attention_rmsnorm_maxval)[name = string("block_3_attention_rmsnorm_maxval_clipped")]; + tensor block_3_attention_rmsnorm_scaled = real_div(x = block_2_residual_2, y = block_3_attention_rmsnorm_maxval_clipped)[name = string("block_3_attention_rmsnorm_scaled")]; + tensor block_3_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_3_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_3_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_3_attention_rmsnorm_squared_sum_keep_dims_0, x = block_3_attention_rmsnorm_scaled)[name = string("block_3_attention_rmsnorm_squared_sum")]; + fp16 block_3_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_3_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_3_attention_rmsnorm_rsqrt_epsilon_0, x = block_3_attention_rmsnorm_squared_sum)[name = string("block_3_attention_rmsnorm_rsqrt")]; + fp16 block_3_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_3_attention_rmsnorm_dim_scaled = mul(x = block_3_attention_rmsnorm_scaled, y = block_3_attention_rmsnorm_dim_scaled_y_0)[name = string("block_3_attention_rmsnorm_dim_scaled")]; + tensor block_3_attention_rmsnorm_normalized = mul(x = block_3_attention_rmsnorm_dim_scaled, y = block_3_attention_rmsnorm_rsqrt)[name = string("block_3_attention_rmsnorm_normalized")]; + tensor block_3_attention_rmsnorm_y_0 = const()[name = string("block_3_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325990208)))]; + tensor block_3_attention_rmsnorm = mul(x = block_3_attention_rmsnorm_normalized, y = block_3_attention_rmsnorm_y_0)[name = string("block_3_attention_rmsnorm")]; + tensor attention_3_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325992064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326766272))))[name = string("attention_3_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_10 = constexpr_blockwise_shift_scale(data = attention_3_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326803200))))[name = string("constexpr_blockwise_shift_scale_10")]; + tensor attention_3_qkvproj_bias_0 = const()[name = string("attention_3_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326805568)))]; + tensor attention_3_qkvproj_strides_0 = const()[name = string("attention_3_qkvproj_strides_0"), val = tensor([1])]; + string attention_3_qkvproj_pad_type_0 = const()[name = string("attention_3_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_3_qkvproj_pad_0 = const()[name = string("attention_3_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_3_qkvproj_dilations_0 = const()[name = string("attention_3_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_3_qkvproj_groups_0 = const()[name = string("attention_3_qkvproj_groups_0"), val = int32(1)]; + tensor attention_3_qkvproj = conv(bias = attention_3_qkvproj_bias_0, dilations = attention_3_qkvproj_dilations_0, groups = attention_3_qkvproj_groups_0, pad = attention_3_qkvproj_pad_0, pad_type = attention_3_qkvproj_pad_type_0, strides = attention_3_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_10, x = block_3_attention_rmsnorm)[name = string("attention_3_qkvproj")]; + tensor attention_3_head_reshape_shape_0 = const()[name = string("attention_3_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_3_head_reshape = reshape(shape = attention_3_head_reshape_shape_0, x = attention_3_qkvproj)[name = string("attention_3_head_reshape")]; + tensor attention_3_head_transpose_perm_0 = const()[name = string("attention_3_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_3_split_qkv_heads_axis_0 = const()[name = string("attention_3_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_3_split_qkv_heads_split_sizes_0 = const()[name = string("attention_3_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_3_head_transpose = transpose(perm = attention_3_head_transpose_perm_0, x = attention_3_head_reshape)[name = string("transpose_42")]; + tensor attention_3_split_qkv_heads_0, tensor attention_3_split_qkv_heads_1, tensor attention_3_split_qkv_heads_2 = split(axis = attention_3_split_qkv_heads_axis_0, split_sizes = attention_3_split_qkv_heads_split_sizes_0, x = attention_3_head_transpose)[name = string("attention_3_split_qkv_heads")]; + tensor attention_3_q_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_3_q_rope_lhs_mult")]; + int32 attention_3_q_rotate_half_split_num_splits_0 = const()[name = string("attention_3_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_3_q_rotate_half_split_axis_0 = const()[name = string("attention_3_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_3_q_rotate_half_split_0, tensor attention_3_q_rotate_half_split_1 = split(axis = attention_3_q_rotate_half_split_axis_0, num_splits = attention_3_q_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_0)[name = string("attention_3_q_rotate_half_split")]; + fp16 attention_3_q_rotate_half_neg_y_0 = const()[name = string("attention_3_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_3_q_rotate_half_neg = mul(x = attention_3_q_rotate_half_split_1, y = attention_3_q_rotate_half_neg_y_0)[name = string("attention_3_q_rotate_half_neg")]; + int32 attention_3_q_rotate_half_concat_axis_0 = const()[name = string("attention_3_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_3_q_rotate_half_concat_interleave_0 = const()[name = string("attention_3_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_3_q_rotate_half_concat = concat(axis = attention_3_q_rotate_half_concat_axis_0, interleave = attention_3_q_rotate_half_concat_interleave_0, values = (attention_3_q_rotate_half_neg, attention_3_q_rotate_half_split_0))[name = string("attention_3_q_rotate_half_concat")]; + tensor attention_3_q_rope_rhs_mult = mul(x = attention_3_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_q_rope_rhs_mult")]; + tensor attention_3_q_rope = add(x = attention_3_q_rope_lhs_mult, y = attention_3_q_rope_rhs_mult)[name = string("attention_3_q_rope")]; + tensor attention_3_k_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_3_k_rope_lhs_mult")]; + int32 attention_3_k_rotate_half_split_num_splits_0 = const()[name = string("attention_3_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_3_k_rotate_half_split_axis_0 = const()[name = string("attention_3_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_3_k_rotate_half_split_0, tensor attention_3_k_rotate_half_split_1 = split(axis = attention_3_k_rotate_half_split_axis_0, num_splits = attention_3_k_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_1)[name = string("attention_3_k_rotate_half_split")]; + fp16 attention_3_k_rotate_half_neg_y_0 = const()[name = string("attention_3_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_3_k_rotate_half_neg = mul(x = attention_3_k_rotate_half_split_1, y = attention_3_k_rotate_half_neg_y_0)[name = string("attention_3_k_rotate_half_neg")]; + int32 attention_3_k_rotate_half_concat_axis_0 = const()[name = string("attention_3_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_3_k_rotate_half_concat_interleave_0 = const()[name = string("attention_3_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_3_k_rotate_half_concat = concat(axis = attention_3_k_rotate_half_concat_axis_0, interleave = attention_3_k_rotate_half_concat_interleave_0, values = (attention_3_k_rotate_half_neg, attention_3_k_rotate_half_split_0))[name = string("attention_3_k_rotate_half_concat")]; + tensor attention_3_k_rope_rhs_mult = mul(x = attention_3_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_k_rope_rhs_mult")]; + tensor attention_3_k_rope = add(x = attention_3_k_rope_lhs_mult, y = attention_3_k_rope_rhs_mult)[name = string("attention_3_k_rope")]; + int32 attention_3_q_splits_axis_0 = const()[name = string("attention_3_q_splits_axis_0"), val = int32(1)]; + int32 attention_3_q_splits_num_splits_0 = const()[name = string("attention_3_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_3_q_splits_0, tensor attention_3_q_splits_1 = split(axis = attention_3_q_splits_axis_0, num_splits = attention_3_q_splits_num_splits_0, x = attention_3_q_rope)[name = string("attention_3_q_splits")]; + tensor attention_3_update_begin_0_values0_0 = const()[name = string("attention_3_update_begin_0_values0_0"), val = tensor([3])]; + tensor attention_3_update_begin_0_values1_0 = const()[name = string("attention_3_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_3_update_begin_0_values3_0 = const()[name = string("attention_3_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_3_update_begin_0_axis_0 = const()[name = string("attention_3_update_begin_0_axis_0"), val = int32(0)]; + bool attention_3_update_begin_0_interleave_0 = const()[name = string("attention_3_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_3_update_begin_0 = concat(axis = attention_3_update_begin_0_axis_0, interleave = attention_3_update_begin_0_interleave_0, values = (attention_3_update_begin_0_values0_0, attention_3_update_begin_0_values1_0, query_pos1, attention_3_update_begin_0_values3_0))[name = string("attention_3_update_begin_0")]; + tensor attention_3_update_end_0_values0_0 = const()[name = string("attention_3_update_end_0_values0_0"), val = tensor([4])]; + tensor attention_3_update_end_0_values1_0 = const()[name = string("attention_3_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_3_update_end_0_values3_0 = const()[name = string("attention_3_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_3_update_end_0_axis_0 = const()[name = string("attention_3_update_end_0_axis_0"), val = int32(0)]; + bool attention_3_update_end_0_interleave_0 = const()[name = string("attention_3_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_3_update_end_0 = concat(axis = attention_3_update_end_0_axis_0, interleave = attention_3_update_end_0_interleave_0, values = (attention_3_update_end_0_values0_0, attention_3_update_end_0_values1_0, end_pos_0, attention_3_update_end_0_values3_0))[name = string("attention_3_update_end_0")]; + tensor attention_3_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_updated_key_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_key_cache_0_squeeze_mask_0, update = attention_3_k_rope, x = coreml_update_state_4)[name = string("attention_3_updated_key_cache_0")]; + write_state(data = attention_3_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_6_write_state")]; + tensor coreml_update_state_6 = read_state(input = key_cache_state)[name = string("coreml_update_state_6")]; + tensor attention_3_key_cache_begin_0 = const()[name = string("attention_3_key_cache_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor attention_3_key_cache_end_0 = const()[name = string("attention_3_key_cache_end_0"), val = tensor([4, 2, 512, 64])]; + tensor attention_3_key_cache_squeeze_mask_0 = const()[name = string("attention_3_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_key_cache = slice_by_index(begin = attention_3_key_cache_begin_0, end = attention_3_key_cache_end_0, squeeze_mask = attention_3_key_cache_squeeze_mask_0, x = coreml_update_state_6)[name = string("attention_3_key_cache")]; + int32 attention_3_key_cache_head_axis_0 = const()[name = string("attention_3_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_3_key_cache_head_num_splits_0 = const()[name = string("attention_3_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_3_key_cache_head_0, tensor attention_3_key_cache_head_1 = split(axis = attention_3_key_cache_head_axis_0, num_splits = attention_3_key_cache_head_num_splits_0, x = attention_3_key_cache)[name = string("attention_3_key_cache_head")]; + tensor attention_3_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_updated_value_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_value_cache_0_squeeze_mask_0, update = attention_3_split_qkv_heads_2, x = coreml_update_state_5)[name = string("attention_3_updated_value_cache_0")]; + write_state(data = attention_3_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_7_write_state")]; + tensor coreml_update_state_7 = read_state(input = value_cache_state)[name = string("coreml_update_state_7")]; + tensor attention_3_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_3_slice_current_layer_value_cache_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor attention_3_slice_current_layer_value_cache_end_0 = const()[name = string("attention_3_slice_current_layer_value_cache_end_0"), val = tensor([4, 2, 512, 64])]; + tensor attention_3_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_3_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_slice_current_layer_value_cache = slice_by_index(begin = attention_3_slice_current_layer_value_cache_begin_0, end = attention_3_slice_current_layer_value_cache_end_0, squeeze_mask = attention_3_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_7)[name = string("attention_3_slice_current_layer_value_cache")]; + int32 attention_3_slice_value_cache_heads_axis_0 = const()[name = string("attention_3_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_3_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_3_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_3_slice_value_cache_heads_0, tensor attention_3_slice_value_cache_heads_1 = split(axis = attention_3_slice_value_cache_heads_axis_0, num_splits = attention_3_slice_value_cache_heads_num_splits_0, x = attention_3_slice_current_layer_value_cache)[name = string("attention_3_slice_value_cache_heads")]; + bool attention_3_scores_0_transpose_y_0 = const()[name = string("attention_3_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_3_scores_0_transpose_x_0 = const()[name = string("attention_3_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_3_scores_0 = matmul(transpose_x = attention_3_scores_0_transpose_x_0, transpose_y = attention_3_scores_0_transpose_y_0, x = attention_3_key_cache_head_0, y = attention_3_q_splits_0)[name = string("attention_3_scores_0")]; + fp16 attention_3_scaled_scores_0_y_0 = const()[name = string("attention_3_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_3_scaled_scores_0 = mul(x = attention_3_scores_0, y = attention_3_scaled_scores_0_y_0)[name = string("attention_3_scaled_scores_0")]; + tensor attention_3_masked_scaled_scores_0 = add(x = attention_3_scaled_scores_0, y = transpose_0)[name = string("attention_3_masked_scaled_scores_0")]; + int32 softmax_6_axis_0 = const()[name = string("softmax_6_axis_0"), val = int32(-2)]; + tensor softmax_6 = softmax(axis = softmax_6_axis_0, x = attention_3_masked_scaled_scores_0)[name = string("softmax_6")]; + bool attention_3_attention_0_transpose_x_0 = const()[name = string("attention_3_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_3_attention_0_transpose_y_0 = const()[name = string("attention_3_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_3_attention_0 = matmul(transpose_x = attention_3_attention_0_transpose_x_0, transpose_y = attention_3_attention_0_transpose_y_0, x = softmax_6, y = attention_3_slice_value_cache_heads_0)[name = string("attention_3_attention_0")]; + bool attention_3_scores_1_transpose_y_0 = const()[name = string("attention_3_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_3_scores_1_transpose_x_0 = const()[name = string("attention_3_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_3_scores_1 = matmul(transpose_x = attention_3_scores_1_transpose_x_0, transpose_y = attention_3_scores_1_transpose_y_0, x = attention_3_key_cache_head_1, y = attention_3_q_splits_1)[name = string("attention_3_scores_1")]; + fp16 attention_3_scaled_scores_1_y_0 = const()[name = string("attention_3_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_3_scaled_scores_1 = mul(x = attention_3_scores_1, y = attention_3_scaled_scores_1_y_0)[name = string("attention_3_scaled_scores_1")]; + tensor attention_3_masked_scaled_scores_1 = add(x = attention_3_scaled_scores_1, y = transpose_0)[name = string("attention_3_masked_scaled_scores_1")]; + int32 softmax_7_axis_0 = const()[name = string("softmax_7_axis_0"), val = int32(-2)]; + tensor softmax_7 = softmax(axis = softmax_7_axis_0, x = attention_3_masked_scaled_scores_1)[name = string("softmax_7")]; + bool attention_3_attention_1_transpose_x_0 = const()[name = string("attention_3_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_3_attention_1_transpose_y_0 = const()[name = string("attention_3_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_3_attention_1 = matmul(transpose_x = attention_3_attention_1_transpose_x_0, transpose_y = attention_3_attention_1_transpose_y_0, x = softmax_7, y = attention_3_slice_value_cache_heads_1)[name = string("attention_3_attention_1")]; + int32 attention_3_concat_attention_all_heads_axis_0 = const()[name = string("attention_3_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_3_concat_attention_all_heads_interleave_0 = const()[name = string("attention_3_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_3_concat_attention_all_heads = concat(axis = attention_3_concat_attention_all_heads_axis_0, interleave = attention_3_concat_attention_all_heads_interleave_0, values = (attention_3_attention_0, attention_3_attention_1))[name = string("attention_3_concat_attention_all_heads")]; + tensor attention_3_channels_first_retransposed_perm_0 = const()[name = string("attention_3_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_3_reshaped_shape_0 = const()[name = string("attention_3_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_3_channels_first_retransposed = transpose(perm = attention_3_channels_first_retransposed_perm_0, x = attention_3_concat_attention_all_heads)[name = string("transpose_41")]; + tensor attention_3_reshaped = reshape(shape = attention_3_reshaped_shape_0, x = attention_3_channels_first_retransposed)[name = string("attention_3_reshaped")]; + tensor attention_3_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326807936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327410112))))[name = string("attention_3_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_11 = constexpr_blockwise_shift_scale(data = attention_3_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327438848))))[name = string("constexpr_blockwise_shift_scale_11")]; + tensor attention_3_outproj_strides_0 = const()[name = string("attention_3_outproj_strides_0"), val = tensor([1])]; + string attention_3_outproj_pad_type_0 = const()[name = string("attention_3_outproj_pad_type_0"), val = string("valid")]; + tensor attention_3_outproj_pad_0 = const()[name = string("attention_3_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_3_outproj_dilations_0 = const()[name = string("attention_3_outproj_dilations_0"), val = tensor([1])]; + int32 attention_3_outproj_groups_0 = const()[name = string("attention_3_outproj_groups_0"), val = int32(1)]; + tensor attention_3_outproj = conv(dilations = attention_3_outproj_dilations_0, groups = attention_3_outproj_groups_0, pad = attention_3_outproj_pad_0, pad_type = attention_3_outproj_pad_type_0, strides = attention_3_outproj_strides_0, weight = constexpr_blockwise_shift_scale_11, x = attention_3_reshaped)[name = string("attention_3_outproj")]; + tensor block_3_residual_1 = add(x = block_2_residual_2, y = attention_3_outproj)[name = string("block_3_residual_1")]; + tensor block_3_ffn_rmsnorm_abs = abs(x = block_3_residual_1)[name = string("block_3_ffn_rmsnorm_abs")]; + tensor block_3_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_3_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_3_ffn_rmsnorm_maxval = reduce_max(axes = block_3_ffn_rmsnorm_maxval_axes_0, keep_dims = block_3_ffn_rmsnorm_maxval_keep_dims_0, x = block_3_ffn_rmsnorm_abs)[name = string("block_3_ffn_rmsnorm_maxval")]; + fp16 block_3_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_3_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_3_ffn_rmsnorm_maxval_clipped = clip(alpha = block_3_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_3_ffn_rmsnorm_maxval_clipped_beta_0, x = block_3_ffn_rmsnorm_maxval)[name = string("block_3_ffn_rmsnorm_maxval_clipped")]; + tensor block_3_ffn_rmsnorm_scaled = real_div(x = block_3_residual_1, y = block_3_ffn_rmsnorm_maxval_clipped)[name = string("block_3_ffn_rmsnorm_scaled")]; + tensor block_3_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_3_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_3_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_3_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_3_ffn_rmsnorm_scaled)[name = string("block_3_ffn_rmsnorm_squared_sum")]; + fp16 block_3_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_3_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_3_ffn_rmsnorm_rsqrt_epsilon_0, x = block_3_ffn_rmsnorm_squared_sum)[name = string("block_3_ffn_rmsnorm_rsqrt")]; + fp16 block_3_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_3_ffn_rmsnorm_dim_scaled = mul(x = block_3_ffn_rmsnorm_scaled, y = block_3_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_3_ffn_rmsnorm_dim_scaled")]; + tensor block_3_ffn_rmsnorm_normalized = mul(x = block_3_ffn_rmsnorm_dim_scaled, y = block_3_ffn_rmsnorm_rsqrt)[name = string("block_3_ffn_rmsnorm_normalized")]; + tensor block_3_ffn_rmsnorm_y_0 = const()[name = string("block_3_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327440704)))]; + tensor block_3_ffn_rmsnorm = mul(x = block_3_ffn_rmsnorm_normalized, y = block_3_ffn_rmsnorm_y_0)[name = string("block_3_ffn_rmsnorm")]; + tensor block_3_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327442560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330711232))))[name = string("block_3_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_12 = constexpr_blockwise_shift_scale(data = block_3_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330866944))))[name = string("constexpr_blockwise_shift_scale_12")]; + tensor block_3_ffn_inproj_strides_0 = const()[name = string("block_3_ffn_inproj_strides_0"), val = tensor([1])]; + string block_3_ffn_inproj_pad_type_0 = const()[name = string("block_3_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_3_ffn_inproj_pad_0 = const()[name = string("block_3_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_3_ffn_inproj_dilations_0 = const()[name = string("block_3_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_3_ffn_inproj_groups_0 = const()[name = string("block_3_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_3_ffn_inproj = conv(dilations = block_3_ffn_inproj_dilations_0, groups = block_3_ffn_inproj_groups_0, pad = block_3_ffn_inproj_pad_0, pad_type = block_3_ffn_inproj_pad_type_0, strides = block_3_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_12, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_inproj")]; + tensor block_3_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330876736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334145408))))[name = string("block_3_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_13 = constexpr_blockwise_shift_scale(data = block_3_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334301120))))[name = string("constexpr_blockwise_shift_scale_13")]; + tensor block_3_ffn_g_strides_0 = const()[name = string("block_3_ffn_g_strides_0"), val = tensor([1])]; + string block_3_ffn_g_pad_type_0 = const()[name = string("block_3_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_3_ffn_g_pad_0 = const()[name = string("block_3_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_3_ffn_g_dilations_0 = const()[name = string("block_3_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_3_ffn_g_groups_0 = const()[name = string("block_3_ffn_g_groups_0"), val = int32(1)]; + tensor block_3_ffn_g = conv(dilations = block_3_ffn_g_dilations_0, groups = block_3_ffn_g_groups_0, pad = block_3_ffn_g_pad_0, pad_type = block_3_ffn_g_pad_type_0, strides = block_3_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_13, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_g")]; + tensor block_3_ffn_g_activation = silu(x = block_3_ffn_g)[name = string("block_3_ffn_g_activation")]; + tensor block_3_ffn_x_gated = mul(x = block_3_ffn_inproj, y = block_3_ffn_g_activation)[name = string("block_3_ffn_x_gated")]; + tensor block_3_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337579584))))[name = string("block_3_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_14 = constexpr_blockwise_shift_scale(data = block_3_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337608320))))[name = string("constexpr_blockwise_shift_scale_14")]; + tensor block_3_ffn_outproj_strides_0 = const()[name = string("block_3_ffn_outproj_strides_0"), val = tensor([1])]; + string block_3_ffn_outproj_pad_type_0 = const()[name = string("block_3_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_3_ffn_outproj_pad_0 = const()[name = string("block_3_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_3_ffn_outproj_dilations_0 = const()[name = string("block_3_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_3_ffn_outproj_groups_0 = const()[name = string("block_3_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_3_ffn_outproj = conv(dilations = block_3_ffn_outproj_dilations_0, groups = block_3_ffn_outproj_groups_0, pad = block_3_ffn_outproj_pad_0, pad_type = block_3_ffn_outproj_pad_type_0, strides = block_3_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_14, x = block_3_ffn_x_gated)[name = string("block_3_ffn_outproj")]; + tensor block_3_residual_2 = add(x = block_3_ffn_outproj, y = block_3_residual_1)[name = string("block_3_residual_2")]; + tensor block_4_attention_rmsnorm_abs = abs(x = block_3_residual_2)[name = string("block_4_attention_rmsnorm_abs")]; + tensor block_4_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_4_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_4_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_4_attention_rmsnorm_maxval = reduce_max(axes = block_4_attention_rmsnorm_maxval_axes_0, keep_dims = block_4_attention_rmsnorm_maxval_keep_dims_0, x = block_4_attention_rmsnorm_abs)[name = string("block_4_attention_rmsnorm_maxval")]; + fp16 block_4_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_4_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_4_attention_rmsnorm_maxval_clipped = clip(alpha = block_4_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_4_attention_rmsnorm_maxval_clipped_beta_0, x = block_4_attention_rmsnorm_maxval)[name = string("block_4_attention_rmsnorm_maxval_clipped")]; + tensor block_4_attention_rmsnorm_scaled = real_div(x = block_3_residual_2, y = block_4_attention_rmsnorm_maxval_clipped)[name = string("block_4_attention_rmsnorm_scaled")]; + tensor block_4_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_4_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_4_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_4_attention_rmsnorm_squared_sum_keep_dims_0, x = block_4_attention_rmsnorm_scaled)[name = string("block_4_attention_rmsnorm_squared_sum")]; + fp16 block_4_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_4_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_4_attention_rmsnorm_rsqrt_epsilon_0, x = block_4_attention_rmsnorm_squared_sum)[name = string("block_4_attention_rmsnorm_rsqrt")]; + fp16 block_4_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_4_attention_rmsnorm_dim_scaled = mul(x = block_4_attention_rmsnorm_scaled, y = block_4_attention_rmsnorm_dim_scaled_y_0)[name = string("block_4_attention_rmsnorm_dim_scaled")]; + tensor block_4_attention_rmsnorm_normalized = mul(x = block_4_attention_rmsnorm_dim_scaled, y = block_4_attention_rmsnorm_rsqrt)[name = string("block_4_attention_rmsnorm_normalized")]; + tensor block_4_attention_rmsnorm_y_0 = const()[name = string("block_4_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337610176)))]; + tensor block_4_attention_rmsnorm = mul(x = block_4_attention_rmsnorm_normalized, y = block_4_attention_rmsnorm_y_0)[name = string("block_4_attention_rmsnorm")]; + tensor attention_4_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337612032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338386240))))[name = string("attention_4_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_15 = constexpr_blockwise_shift_scale(data = attention_4_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338423168))))[name = string("constexpr_blockwise_shift_scale_15")]; + tensor attention_4_qkvproj_bias_0 = const()[name = string("attention_4_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338425536)))]; + tensor attention_4_qkvproj_strides_0 = const()[name = string("attention_4_qkvproj_strides_0"), val = tensor([1])]; + string attention_4_qkvproj_pad_type_0 = const()[name = string("attention_4_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_4_qkvproj_pad_0 = const()[name = string("attention_4_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_4_qkvproj_dilations_0 = const()[name = string("attention_4_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_4_qkvproj_groups_0 = const()[name = string("attention_4_qkvproj_groups_0"), val = int32(1)]; + tensor attention_4_qkvproj = conv(bias = attention_4_qkvproj_bias_0, dilations = attention_4_qkvproj_dilations_0, groups = attention_4_qkvproj_groups_0, pad = attention_4_qkvproj_pad_0, pad_type = attention_4_qkvproj_pad_type_0, strides = attention_4_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_15, x = block_4_attention_rmsnorm)[name = string("attention_4_qkvproj")]; + tensor attention_4_head_reshape_shape_0 = const()[name = string("attention_4_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_4_head_reshape = reshape(shape = attention_4_head_reshape_shape_0, x = attention_4_qkvproj)[name = string("attention_4_head_reshape")]; + tensor attention_4_head_transpose_perm_0 = const()[name = string("attention_4_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_4_split_qkv_heads_axis_0 = const()[name = string("attention_4_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_4_split_qkv_heads_split_sizes_0 = const()[name = string("attention_4_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_4_head_transpose = transpose(perm = attention_4_head_transpose_perm_0, x = attention_4_head_reshape)[name = string("transpose_40")]; + tensor attention_4_split_qkv_heads_0, tensor attention_4_split_qkv_heads_1, tensor attention_4_split_qkv_heads_2 = split(axis = attention_4_split_qkv_heads_axis_0, split_sizes = attention_4_split_qkv_heads_split_sizes_0, x = attention_4_head_transpose)[name = string("attention_4_split_qkv_heads")]; + tensor attention_4_q_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_4_q_rope_lhs_mult")]; + int32 attention_4_q_rotate_half_split_num_splits_0 = const()[name = string("attention_4_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_4_q_rotate_half_split_axis_0 = const()[name = string("attention_4_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_4_q_rotate_half_split_0, tensor attention_4_q_rotate_half_split_1 = split(axis = attention_4_q_rotate_half_split_axis_0, num_splits = attention_4_q_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_0)[name = string("attention_4_q_rotate_half_split")]; + fp16 attention_4_q_rotate_half_neg_y_0 = const()[name = string("attention_4_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_4_q_rotate_half_neg = mul(x = attention_4_q_rotate_half_split_1, y = attention_4_q_rotate_half_neg_y_0)[name = string("attention_4_q_rotate_half_neg")]; + int32 attention_4_q_rotate_half_concat_axis_0 = const()[name = string("attention_4_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_4_q_rotate_half_concat_interleave_0 = const()[name = string("attention_4_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_4_q_rotate_half_concat = concat(axis = attention_4_q_rotate_half_concat_axis_0, interleave = attention_4_q_rotate_half_concat_interleave_0, values = (attention_4_q_rotate_half_neg, attention_4_q_rotate_half_split_0))[name = string("attention_4_q_rotate_half_concat")]; + tensor attention_4_q_rope_rhs_mult = mul(x = attention_4_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_q_rope_rhs_mult")]; + tensor attention_4_q_rope = add(x = attention_4_q_rope_lhs_mult, y = attention_4_q_rope_rhs_mult)[name = string("attention_4_q_rope")]; + tensor attention_4_k_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_4_k_rope_lhs_mult")]; + int32 attention_4_k_rotate_half_split_num_splits_0 = const()[name = string("attention_4_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_4_k_rotate_half_split_axis_0 = const()[name = string("attention_4_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_4_k_rotate_half_split_0, tensor attention_4_k_rotate_half_split_1 = split(axis = attention_4_k_rotate_half_split_axis_0, num_splits = attention_4_k_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_1)[name = string("attention_4_k_rotate_half_split")]; + fp16 attention_4_k_rotate_half_neg_y_0 = const()[name = string("attention_4_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_4_k_rotate_half_neg = mul(x = attention_4_k_rotate_half_split_1, y = attention_4_k_rotate_half_neg_y_0)[name = string("attention_4_k_rotate_half_neg")]; + int32 attention_4_k_rotate_half_concat_axis_0 = const()[name = string("attention_4_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_4_k_rotate_half_concat_interleave_0 = const()[name = string("attention_4_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_4_k_rotate_half_concat = concat(axis = attention_4_k_rotate_half_concat_axis_0, interleave = attention_4_k_rotate_half_concat_interleave_0, values = (attention_4_k_rotate_half_neg, attention_4_k_rotate_half_split_0))[name = string("attention_4_k_rotate_half_concat")]; + tensor attention_4_k_rope_rhs_mult = mul(x = attention_4_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_k_rope_rhs_mult")]; + tensor attention_4_k_rope = add(x = attention_4_k_rope_lhs_mult, y = attention_4_k_rope_rhs_mult)[name = string("attention_4_k_rope")]; + int32 attention_4_q_splits_axis_0 = const()[name = string("attention_4_q_splits_axis_0"), val = int32(1)]; + int32 attention_4_q_splits_num_splits_0 = const()[name = string("attention_4_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_4_q_splits_0, tensor attention_4_q_splits_1 = split(axis = attention_4_q_splits_axis_0, num_splits = attention_4_q_splits_num_splits_0, x = attention_4_q_rope)[name = string("attention_4_q_splits")]; + tensor attention_4_update_begin_0_values0_0 = const()[name = string("attention_4_update_begin_0_values0_0"), val = tensor([4])]; + tensor attention_4_update_begin_0_values1_0 = const()[name = string("attention_4_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_4_update_begin_0_values3_0 = const()[name = string("attention_4_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_4_update_begin_0_axis_0 = const()[name = string("attention_4_update_begin_0_axis_0"), val = int32(0)]; + bool attention_4_update_begin_0_interleave_0 = const()[name = string("attention_4_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_4_update_begin_0 = concat(axis = attention_4_update_begin_0_axis_0, interleave = attention_4_update_begin_0_interleave_0, values = (attention_4_update_begin_0_values0_0, attention_4_update_begin_0_values1_0, query_pos1, attention_4_update_begin_0_values3_0))[name = string("attention_4_update_begin_0")]; + tensor attention_4_update_end_0_values0_0 = const()[name = string("attention_4_update_end_0_values0_0"), val = tensor([5])]; + tensor attention_4_update_end_0_values1_0 = const()[name = string("attention_4_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_4_update_end_0_values3_0 = const()[name = string("attention_4_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_4_update_end_0_axis_0 = const()[name = string("attention_4_update_end_0_axis_0"), val = int32(0)]; + bool attention_4_update_end_0_interleave_0 = const()[name = string("attention_4_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_4_update_end_0 = concat(axis = attention_4_update_end_0_axis_0, interleave = attention_4_update_end_0_interleave_0, values = (attention_4_update_end_0_values0_0, attention_4_update_end_0_values1_0, end_pos_0, attention_4_update_end_0_values3_0))[name = string("attention_4_update_end_0")]; + tensor attention_4_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_updated_key_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_key_cache_0_squeeze_mask_0, update = attention_4_k_rope, x = coreml_update_state_6)[name = string("attention_4_updated_key_cache_0")]; + write_state(data = attention_4_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_8_write_state")]; + tensor coreml_update_state_8 = read_state(input = key_cache_state)[name = string("coreml_update_state_8")]; + tensor attention_4_key_cache_begin_0 = const()[name = string("attention_4_key_cache_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor attention_4_key_cache_end_0 = const()[name = string("attention_4_key_cache_end_0"), val = tensor([5, 2, 512, 64])]; + tensor attention_4_key_cache_squeeze_mask_0 = const()[name = string("attention_4_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_key_cache = slice_by_index(begin = attention_4_key_cache_begin_0, end = attention_4_key_cache_end_0, squeeze_mask = attention_4_key_cache_squeeze_mask_0, x = coreml_update_state_8)[name = string("attention_4_key_cache")]; + int32 attention_4_key_cache_head_axis_0 = const()[name = string("attention_4_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_4_key_cache_head_num_splits_0 = const()[name = string("attention_4_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_4_key_cache_head_0, tensor attention_4_key_cache_head_1 = split(axis = attention_4_key_cache_head_axis_0, num_splits = attention_4_key_cache_head_num_splits_0, x = attention_4_key_cache)[name = string("attention_4_key_cache_head")]; + tensor attention_4_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_updated_value_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_value_cache_0_squeeze_mask_0, update = attention_4_split_qkv_heads_2, x = coreml_update_state_7)[name = string("attention_4_updated_value_cache_0")]; + write_state(data = attention_4_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_9_write_state")]; + tensor coreml_update_state_9 = read_state(input = value_cache_state)[name = string("coreml_update_state_9")]; + tensor attention_4_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_4_slice_current_layer_value_cache_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor attention_4_slice_current_layer_value_cache_end_0 = const()[name = string("attention_4_slice_current_layer_value_cache_end_0"), val = tensor([5, 2, 512, 64])]; + tensor attention_4_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_4_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_slice_current_layer_value_cache = slice_by_index(begin = attention_4_slice_current_layer_value_cache_begin_0, end = attention_4_slice_current_layer_value_cache_end_0, squeeze_mask = attention_4_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_9)[name = string("attention_4_slice_current_layer_value_cache")]; + int32 attention_4_slice_value_cache_heads_axis_0 = const()[name = string("attention_4_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_4_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_4_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_4_slice_value_cache_heads_0, tensor attention_4_slice_value_cache_heads_1 = split(axis = attention_4_slice_value_cache_heads_axis_0, num_splits = attention_4_slice_value_cache_heads_num_splits_0, x = attention_4_slice_current_layer_value_cache)[name = string("attention_4_slice_value_cache_heads")]; + bool attention_4_scores_0_transpose_y_0 = const()[name = string("attention_4_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_4_scores_0_transpose_x_0 = const()[name = string("attention_4_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_4_scores_0 = matmul(transpose_x = attention_4_scores_0_transpose_x_0, transpose_y = attention_4_scores_0_transpose_y_0, x = attention_4_key_cache_head_0, y = attention_4_q_splits_0)[name = string("attention_4_scores_0")]; + fp16 attention_4_scaled_scores_0_y_0 = const()[name = string("attention_4_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_4_scaled_scores_0 = mul(x = attention_4_scores_0, y = attention_4_scaled_scores_0_y_0)[name = string("attention_4_scaled_scores_0")]; + tensor attention_4_masked_scaled_scores_0 = add(x = attention_4_scaled_scores_0, y = transpose_0)[name = string("attention_4_masked_scaled_scores_0")]; + int32 softmax_8_axis_0 = const()[name = string("softmax_8_axis_0"), val = int32(-2)]; + tensor softmax_8 = softmax(axis = softmax_8_axis_0, x = attention_4_masked_scaled_scores_0)[name = string("softmax_8")]; + bool attention_4_attention_0_transpose_x_0 = const()[name = string("attention_4_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_4_attention_0_transpose_y_0 = const()[name = string("attention_4_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_4_attention_0 = matmul(transpose_x = attention_4_attention_0_transpose_x_0, transpose_y = attention_4_attention_0_transpose_y_0, x = softmax_8, y = attention_4_slice_value_cache_heads_0)[name = string("attention_4_attention_0")]; + bool attention_4_scores_1_transpose_y_0 = const()[name = string("attention_4_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_4_scores_1_transpose_x_0 = const()[name = string("attention_4_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_4_scores_1 = matmul(transpose_x = attention_4_scores_1_transpose_x_0, transpose_y = attention_4_scores_1_transpose_y_0, x = attention_4_key_cache_head_1, y = attention_4_q_splits_1)[name = string("attention_4_scores_1")]; + fp16 attention_4_scaled_scores_1_y_0 = const()[name = string("attention_4_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_4_scaled_scores_1 = mul(x = attention_4_scores_1, y = attention_4_scaled_scores_1_y_0)[name = string("attention_4_scaled_scores_1")]; + tensor attention_4_masked_scaled_scores_1 = add(x = attention_4_scaled_scores_1, y = transpose_0)[name = string("attention_4_masked_scaled_scores_1")]; + int32 softmax_9_axis_0 = const()[name = string("softmax_9_axis_0"), val = int32(-2)]; + tensor softmax_9 = softmax(axis = softmax_9_axis_0, x = attention_4_masked_scaled_scores_1)[name = string("softmax_9")]; + bool attention_4_attention_1_transpose_x_0 = const()[name = string("attention_4_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_4_attention_1_transpose_y_0 = const()[name = string("attention_4_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_4_attention_1 = matmul(transpose_x = attention_4_attention_1_transpose_x_0, transpose_y = attention_4_attention_1_transpose_y_0, x = softmax_9, y = attention_4_slice_value_cache_heads_1)[name = string("attention_4_attention_1")]; + int32 attention_4_concat_attention_all_heads_axis_0 = const()[name = string("attention_4_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_4_concat_attention_all_heads_interleave_0 = const()[name = string("attention_4_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_4_concat_attention_all_heads = concat(axis = attention_4_concat_attention_all_heads_axis_0, interleave = attention_4_concat_attention_all_heads_interleave_0, values = (attention_4_attention_0, attention_4_attention_1))[name = string("attention_4_concat_attention_all_heads")]; + tensor attention_4_channels_first_retransposed_perm_0 = const()[name = string("attention_4_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_4_reshaped_shape_0 = const()[name = string("attention_4_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_4_channels_first_retransposed = transpose(perm = attention_4_channels_first_retransposed_perm_0, x = attention_4_concat_attention_all_heads)[name = string("transpose_39")]; + tensor attention_4_reshaped = reshape(shape = attention_4_reshaped_shape_0, x = attention_4_channels_first_retransposed)[name = string("attention_4_reshaped")]; + tensor attention_4_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338427904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339030080))))[name = string("attention_4_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_16 = constexpr_blockwise_shift_scale(data = attention_4_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339058816))))[name = string("constexpr_blockwise_shift_scale_16")]; + tensor attention_4_outproj_strides_0 = const()[name = string("attention_4_outproj_strides_0"), val = tensor([1])]; + string attention_4_outproj_pad_type_0 = const()[name = string("attention_4_outproj_pad_type_0"), val = string("valid")]; + tensor attention_4_outproj_pad_0 = const()[name = string("attention_4_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_4_outproj_dilations_0 = const()[name = string("attention_4_outproj_dilations_0"), val = tensor([1])]; + int32 attention_4_outproj_groups_0 = const()[name = string("attention_4_outproj_groups_0"), val = int32(1)]; + tensor attention_4_outproj = conv(dilations = attention_4_outproj_dilations_0, groups = attention_4_outproj_groups_0, pad = attention_4_outproj_pad_0, pad_type = attention_4_outproj_pad_type_0, strides = attention_4_outproj_strides_0, weight = constexpr_blockwise_shift_scale_16, x = attention_4_reshaped)[name = string("attention_4_outproj")]; + tensor block_4_residual_1 = add(x = block_3_residual_2, y = attention_4_outproj)[name = string("block_4_residual_1")]; + tensor block_4_ffn_rmsnorm_abs = abs(x = block_4_residual_1)[name = string("block_4_ffn_rmsnorm_abs")]; + tensor block_4_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_4_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_4_ffn_rmsnorm_maxval = reduce_max(axes = block_4_ffn_rmsnorm_maxval_axes_0, keep_dims = block_4_ffn_rmsnorm_maxval_keep_dims_0, x = block_4_ffn_rmsnorm_abs)[name = string("block_4_ffn_rmsnorm_maxval")]; + fp16 block_4_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_4_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_4_ffn_rmsnorm_maxval_clipped = clip(alpha = block_4_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_4_ffn_rmsnorm_maxval_clipped_beta_0, x = block_4_ffn_rmsnorm_maxval)[name = string("block_4_ffn_rmsnorm_maxval_clipped")]; + tensor block_4_ffn_rmsnorm_scaled = real_div(x = block_4_residual_1, y = block_4_ffn_rmsnorm_maxval_clipped)[name = string("block_4_ffn_rmsnorm_scaled")]; + tensor block_4_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_4_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_4_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_4_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_4_ffn_rmsnorm_scaled)[name = string("block_4_ffn_rmsnorm_squared_sum")]; + fp16 block_4_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_4_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_4_ffn_rmsnorm_rsqrt_epsilon_0, x = block_4_ffn_rmsnorm_squared_sum)[name = string("block_4_ffn_rmsnorm_rsqrt")]; + fp16 block_4_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_4_ffn_rmsnorm_dim_scaled = mul(x = block_4_ffn_rmsnorm_scaled, y = block_4_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_4_ffn_rmsnorm_dim_scaled")]; + tensor block_4_ffn_rmsnorm_normalized = mul(x = block_4_ffn_rmsnorm_dim_scaled, y = block_4_ffn_rmsnorm_rsqrt)[name = string("block_4_ffn_rmsnorm_normalized")]; + tensor block_4_ffn_rmsnorm_y_0 = const()[name = string("block_4_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339060672)))]; + tensor block_4_ffn_rmsnorm = mul(x = block_4_ffn_rmsnorm_normalized, y = block_4_ffn_rmsnorm_y_0)[name = string("block_4_ffn_rmsnorm")]; + tensor block_4_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339062528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342331200))))[name = string("block_4_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_17 = constexpr_blockwise_shift_scale(data = block_4_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342486912))))[name = string("constexpr_blockwise_shift_scale_17")]; + tensor block_4_ffn_inproj_strides_0 = const()[name = string("block_4_ffn_inproj_strides_0"), val = tensor([1])]; + string block_4_ffn_inproj_pad_type_0 = const()[name = string("block_4_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_4_ffn_inproj_pad_0 = const()[name = string("block_4_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_4_ffn_inproj_dilations_0 = const()[name = string("block_4_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_4_ffn_inproj_groups_0 = const()[name = string("block_4_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_4_ffn_inproj = conv(dilations = block_4_ffn_inproj_dilations_0, groups = block_4_ffn_inproj_groups_0, pad = block_4_ffn_inproj_pad_0, pad_type = block_4_ffn_inproj_pad_type_0, strides = block_4_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_17, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_inproj")]; + tensor block_4_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342496704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345765376))))[name = string("block_4_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_18 = constexpr_blockwise_shift_scale(data = block_4_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345921088))))[name = string("constexpr_blockwise_shift_scale_18")]; + tensor block_4_ffn_g_strides_0 = const()[name = string("block_4_ffn_g_strides_0"), val = tensor([1])]; + string block_4_ffn_g_pad_type_0 = const()[name = string("block_4_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_4_ffn_g_pad_0 = const()[name = string("block_4_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_4_ffn_g_dilations_0 = const()[name = string("block_4_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_4_ffn_g_groups_0 = const()[name = string("block_4_ffn_g_groups_0"), val = int32(1)]; + tensor block_4_ffn_g = conv(dilations = block_4_ffn_g_dilations_0, groups = block_4_ffn_g_groups_0, pad = block_4_ffn_g_pad_0, pad_type = block_4_ffn_g_pad_type_0, strides = block_4_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_18, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_g")]; + tensor block_4_ffn_g_activation = silu(x = block_4_ffn_g)[name = string("block_4_ffn_g_activation")]; + tensor block_4_ffn_x_gated = mul(x = block_4_ffn_inproj, y = block_4_ffn_g_activation)[name = string("block_4_ffn_x_gated")]; + tensor block_4_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345930880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349199552))))[name = string("block_4_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_19 = constexpr_blockwise_shift_scale(data = block_4_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349228288))))[name = string("constexpr_blockwise_shift_scale_19")]; + tensor block_4_ffn_outproj_strides_0 = const()[name = string("block_4_ffn_outproj_strides_0"), val = tensor([1])]; + string block_4_ffn_outproj_pad_type_0 = const()[name = string("block_4_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_4_ffn_outproj_pad_0 = const()[name = string("block_4_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_4_ffn_outproj_dilations_0 = const()[name = string("block_4_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_4_ffn_outproj_groups_0 = const()[name = string("block_4_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_4_ffn_outproj = conv(dilations = block_4_ffn_outproj_dilations_0, groups = block_4_ffn_outproj_groups_0, pad = block_4_ffn_outproj_pad_0, pad_type = block_4_ffn_outproj_pad_type_0, strides = block_4_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_19, x = block_4_ffn_x_gated)[name = string("block_4_ffn_outproj")]; + tensor block_4_residual_2 = add(x = block_4_ffn_outproj, y = block_4_residual_1)[name = string("block_4_residual_2")]; + tensor block_5_attention_rmsnorm_abs = abs(x = block_4_residual_2)[name = string("block_5_attention_rmsnorm_abs")]; + tensor block_5_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_5_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_5_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_5_attention_rmsnorm_maxval = reduce_max(axes = block_5_attention_rmsnorm_maxval_axes_0, keep_dims = block_5_attention_rmsnorm_maxval_keep_dims_0, x = block_5_attention_rmsnorm_abs)[name = string("block_5_attention_rmsnorm_maxval")]; + fp16 block_5_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_5_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_5_attention_rmsnorm_maxval_clipped = clip(alpha = block_5_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_5_attention_rmsnorm_maxval_clipped_beta_0, x = block_5_attention_rmsnorm_maxval)[name = string("block_5_attention_rmsnorm_maxval_clipped")]; + tensor block_5_attention_rmsnorm_scaled = real_div(x = block_4_residual_2, y = block_5_attention_rmsnorm_maxval_clipped)[name = string("block_5_attention_rmsnorm_scaled")]; + tensor block_5_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_5_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_5_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_5_attention_rmsnorm_squared_sum_keep_dims_0, x = block_5_attention_rmsnorm_scaled)[name = string("block_5_attention_rmsnorm_squared_sum")]; + fp16 block_5_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_5_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_5_attention_rmsnorm_rsqrt_epsilon_0, x = block_5_attention_rmsnorm_squared_sum)[name = string("block_5_attention_rmsnorm_rsqrt")]; + fp16 block_5_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_5_attention_rmsnorm_dim_scaled = mul(x = block_5_attention_rmsnorm_scaled, y = block_5_attention_rmsnorm_dim_scaled_y_0)[name = string("block_5_attention_rmsnorm_dim_scaled")]; + tensor block_5_attention_rmsnorm_normalized = mul(x = block_5_attention_rmsnorm_dim_scaled, y = block_5_attention_rmsnorm_rsqrt)[name = string("block_5_attention_rmsnorm_normalized")]; + tensor block_5_attention_rmsnorm_y_0 = const()[name = string("block_5_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349230144)))]; + tensor block_5_attention_rmsnorm = mul(x = block_5_attention_rmsnorm_normalized, y = block_5_attention_rmsnorm_y_0)[name = string("block_5_attention_rmsnorm")]; + tensor attention_5_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349232000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350006208))))[name = string("attention_5_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_20 = constexpr_blockwise_shift_scale(data = attention_5_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350043136))))[name = string("constexpr_blockwise_shift_scale_20")]; + tensor attention_5_qkvproj_bias_0 = const()[name = string("attention_5_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350045504)))]; + tensor attention_5_qkvproj_strides_0 = const()[name = string("attention_5_qkvproj_strides_0"), val = tensor([1])]; + string attention_5_qkvproj_pad_type_0 = const()[name = string("attention_5_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_5_qkvproj_pad_0 = const()[name = string("attention_5_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_5_qkvproj_dilations_0 = const()[name = string("attention_5_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_5_qkvproj_groups_0 = const()[name = string("attention_5_qkvproj_groups_0"), val = int32(1)]; + tensor attention_5_qkvproj = conv(bias = attention_5_qkvproj_bias_0, dilations = attention_5_qkvproj_dilations_0, groups = attention_5_qkvproj_groups_0, pad = attention_5_qkvproj_pad_0, pad_type = attention_5_qkvproj_pad_type_0, strides = attention_5_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_20, x = block_5_attention_rmsnorm)[name = string("attention_5_qkvproj")]; + tensor attention_5_head_reshape_shape_0 = const()[name = string("attention_5_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_5_head_reshape = reshape(shape = attention_5_head_reshape_shape_0, x = attention_5_qkvproj)[name = string("attention_5_head_reshape")]; + tensor attention_5_head_transpose_perm_0 = const()[name = string("attention_5_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_5_split_qkv_heads_axis_0 = const()[name = string("attention_5_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_5_split_qkv_heads_split_sizes_0 = const()[name = string("attention_5_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_5_head_transpose = transpose(perm = attention_5_head_transpose_perm_0, x = attention_5_head_reshape)[name = string("transpose_38")]; + tensor attention_5_split_qkv_heads_0, tensor attention_5_split_qkv_heads_1, tensor attention_5_split_qkv_heads_2 = split(axis = attention_5_split_qkv_heads_axis_0, split_sizes = attention_5_split_qkv_heads_split_sizes_0, x = attention_5_head_transpose)[name = string("attention_5_split_qkv_heads")]; + tensor attention_5_q_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_5_q_rope_lhs_mult")]; + int32 attention_5_q_rotate_half_split_num_splits_0 = const()[name = string("attention_5_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_5_q_rotate_half_split_axis_0 = const()[name = string("attention_5_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_5_q_rotate_half_split_0, tensor attention_5_q_rotate_half_split_1 = split(axis = attention_5_q_rotate_half_split_axis_0, num_splits = attention_5_q_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_0)[name = string("attention_5_q_rotate_half_split")]; + fp16 attention_5_q_rotate_half_neg_y_0 = const()[name = string("attention_5_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_5_q_rotate_half_neg = mul(x = attention_5_q_rotate_half_split_1, y = attention_5_q_rotate_half_neg_y_0)[name = string("attention_5_q_rotate_half_neg")]; + int32 attention_5_q_rotate_half_concat_axis_0 = const()[name = string("attention_5_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_5_q_rotate_half_concat_interleave_0 = const()[name = string("attention_5_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_5_q_rotate_half_concat = concat(axis = attention_5_q_rotate_half_concat_axis_0, interleave = attention_5_q_rotate_half_concat_interleave_0, values = (attention_5_q_rotate_half_neg, attention_5_q_rotate_half_split_0))[name = string("attention_5_q_rotate_half_concat")]; + tensor attention_5_q_rope_rhs_mult = mul(x = attention_5_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_q_rope_rhs_mult")]; + tensor attention_5_q_rope = add(x = attention_5_q_rope_lhs_mult, y = attention_5_q_rope_rhs_mult)[name = string("attention_5_q_rope")]; + tensor attention_5_k_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_5_k_rope_lhs_mult")]; + int32 attention_5_k_rotate_half_split_num_splits_0 = const()[name = string("attention_5_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_5_k_rotate_half_split_axis_0 = const()[name = string("attention_5_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_5_k_rotate_half_split_0, tensor attention_5_k_rotate_half_split_1 = split(axis = attention_5_k_rotate_half_split_axis_0, num_splits = attention_5_k_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_1)[name = string("attention_5_k_rotate_half_split")]; + fp16 attention_5_k_rotate_half_neg_y_0 = const()[name = string("attention_5_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_5_k_rotate_half_neg = mul(x = attention_5_k_rotate_half_split_1, y = attention_5_k_rotate_half_neg_y_0)[name = string("attention_5_k_rotate_half_neg")]; + int32 attention_5_k_rotate_half_concat_axis_0 = const()[name = string("attention_5_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_5_k_rotate_half_concat_interleave_0 = const()[name = string("attention_5_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_5_k_rotate_half_concat = concat(axis = attention_5_k_rotate_half_concat_axis_0, interleave = attention_5_k_rotate_half_concat_interleave_0, values = (attention_5_k_rotate_half_neg, attention_5_k_rotate_half_split_0))[name = string("attention_5_k_rotate_half_concat")]; + tensor attention_5_k_rope_rhs_mult = mul(x = attention_5_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_k_rope_rhs_mult")]; + tensor attention_5_k_rope = add(x = attention_5_k_rope_lhs_mult, y = attention_5_k_rope_rhs_mult)[name = string("attention_5_k_rope")]; + int32 attention_5_q_splits_axis_0 = const()[name = string("attention_5_q_splits_axis_0"), val = int32(1)]; + int32 attention_5_q_splits_num_splits_0 = const()[name = string("attention_5_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_5_q_splits_0, tensor attention_5_q_splits_1 = split(axis = attention_5_q_splits_axis_0, num_splits = attention_5_q_splits_num_splits_0, x = attention_5_q_rope)[name = string("attention_5_q_splits")]; + tensor attention_5_update_begin_0_values0_0 = const()[name = string("attention_5_update_begin_0_values0_0"), val = tensor([5])]; + tensor attention_5_update_begin_0_values1_0 = const()[name = string("attention_5_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_5_update_begin_0_values3_0 = const()[name = string("attention_5_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_5_update_begin_0_axis_0 = const()[name = string("attention_5_update_begin_0_axis_0"), val = int32(0)]; + bool attention_5_update_begin_0_interleave_0 = const()[name = string("attention_5_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_5_update_begin_0 = concat(axis = attention_5_update_begin_0_axis_0, interleave = attention_5_update_begin_0_interleave_0, values = (attention_5_update_begin_0_values0_0, attention_5_update_begin_0_values1_0, query_pos1, attention_5_update_begin_0_values3_0))[name = string("attention_5_update_begin_0")]; + tensor attention_5_update_end_0_values0_0 = const()[name = string("attention_5_update_end_0_values0_0"), val = tensor([6])]; + tensor attention_5_update_end_0_values1_0 = const()[name = string("attention_5_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_5_update_end_0_values3_0 = const()[name = string("attention_5_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_5_update_end_0_axis_0 = const()[name = string("attention_5_update_end_0_axis_0"), val = int32(0)]; + bool attention_5_update_end_0_interleave_0 = const()[name = string("attention_5_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_5_update_end_0 = concat(axis = attention_5_update_end_0_axis_0, interleave = attention_5_update_end_0_interleave_0, values = (attention_5_update_end_0_values0_0, attention_5_update_end_0_values1_0, end_pos_0, attention_5_update_end_0_values3_0))[name = string("attention_5_update_end_0")]; + tensor attention_5_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_updated_key_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_key_cache_0_squeeze_mask_0, update = attention_5_k_rope, x = coreml_update_state_8)[name = string("attention_5_updated_key_cache_0")]; + write_state(data = attention_5_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_10_write_state")]; + tensor coreml_update_state_10 = read_state(input = key_cache_state)[name = string("coreml_update_state_10")]; + tensor attention_5_key_cache_begin_0 = const()[name = string("attention_5_key_cache_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor attention_5_key_cache_end_0 = const()[name = string("attention_5_key_cache_end_0"), val = tensor([6, 2, 512, 64])]; + tensor attention_5_key_cache_squeeze_mask_0 = const()[name = string("attention_5_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_key_cache = slice_by_index(begin = attention_5_key_cache_begin_0, end = attention_5_key_cache_end_0, squeeze_mask = attention_5_key_cache_squeeze_mask_0, x = coreml_update_state_10)[name = string("attention_5_key_cache")]; + int32 attention_5_key_cache_head_axis_0 = const()[name = string("attention_5_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_5_key_cache_head_num_splits_0 = const()[name = string("attention_5_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_5_key_cache_head_0, tensor attention_5_key_cache_head_1 = split(axis = attention_5_key_cache_head_axis_0, num_splits = attention_5_key_cache_head_num_splits_0, x = attention_5_key_cache)[name = string("attention_5_key_cache_head")]; + tensor attention_5_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_updated_value_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_value_cache_0_squeeze_mask_0, update = attention_5_split_qkv_heads_2, x = coreml_update_state_9)[name = string("attention_5_updated_value_cache_0")]; + write_state(data = attention_5_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_11_write_state")]; + tensor coreml_update_state_11 = read_state(input = value_cache_state)[name = string("coreml_update_state_11")]; + tensor attention_5_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_5_slice_current_layer_value_cache_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor attention_5_slice_current_layer_value_cache_end_0 = const()[name = string("attention_5_slice_current_layer_value_cache_end_0"), val = tensor([6, 2, 512, 64])]; + tensor attention_5_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_5_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_slice_current_layer_value_cache = slice_by_index(begin = attention_5_slice_current_layer_value_cache_begin_0, end = attention_5_slice_current_layer_value_cache_end_0, squeeze_mask = attention_5_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_11)[name = string("attention_5_slice_current_layer_value_cache")]; + int32 attention_5_slice_value_cache_heads_axis_0 = const()[name = string("attention_5_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_5_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_5_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_5_slice_value_cache_heads_0, tensor attention_5_slice_value_cache_heads_1 = split(axis = attention_5_slice_value_cache_heads_axis_0, num_splits = attention_5_slice_value_cache_heads_num_splits_0, x = attention_5_slice_current_layer_value_cache)[name = string("attention_5_slice_value_cache_heads")]; + bool attention_5_scores_0_transpose_y_0 = const()[name = string("attention_5_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_5_scores_0_transpose_x_0 = const()[name = string("attention_5_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_5_scores_0 = matmul(transpose_x = attention_5_scores_0_transpose_x_0, transpose_y = attention_5_scores_0_transpose_y_0, x = attention_5_key_cache_head_0, y = attention_5_q_splits_0)[name = string("attention_5_scores_0")]; + fp16 attention_5_scaled_scores_0_y_0 = const()[name = string("attention_5_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_5_scaled_scores_0 = mul(x = attention_5_scores_0, y = attention_5_scaled_scores_0_y_0)[name = string("attention_5_scaled_scores_0")]; + tensor attention_5_masked_scaled_scores_0 = add(x = attention_5_scaled_scores_0, y = transpose_0)[name = string("attention_5_masked_scaled_scores_0")]; + int32 softmax_10_axis_0 = const()[name = string("softmax_10_axis_0"), val = int32(-2)]; + tensor softmax_10 = softmax(axis = softmax_10_axis_0, x = attention_5_masked_scaled_scores_0)[name = string("softmax_10")]; + bool attention_5_attention_0_transpose_x_0 = const()[name = string("attention_5_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_5_attention_0_transpose_y_0 = const()[name = string("attention_5_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_5_attention_0 = matmul(transpose_x = attention_5_attention_0_transpose_x_0, transpose_y = attention_5_attention_0_transpose_y_0, x = softmax_10, y = attention_5_slice_value_cache_heads_0)[name = string("attention_5_attention_0")]; + bool attention_5_scores_1_transpose_y_0 = const()[name = string("attention_5_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_5_scores_1_transpose_x_0 = const()[name = string("attention_5_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_5_scores_1 = matmul(transpose_x = attention_5_scores_1_transpose_x_0, transpose_y = attention_5_scores_1_transpose_y_0, x = attention_5_key_cache_head_1, y = attention_5_q_splits_1)[name = string("attention_5_scores_1")]; + fp16 attention_5_scaled_scores_1_y_0 = const()[name = string("attention_5_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_5_scaled_scores_1 = mul(x = attention_5_scores_1, y = attention_5_scaled_scores_1_y_0)[name = string("attention_5_scaled_scores_1")]; + tensor attention_5_masked_scaled_scores_1 = add(x = attention_5_scaled_scores_1, y = transpose_0)[name = string("attention_5_masked_scaled_scores_1")]; + int32 softmax_11_axis_0 = const()[name = string("softmax_11_axis_0"), val = int32(-2)]; + tensor softmax_11 = softmax(axis = softmax_11_axis_0, x = attention_5_masked_scaled_scores_1)[name = string("softmax_11")]; + bool attention_5_attention_1_transpose_x_0 = const()[name = string("attention_5_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_5_attention_1_transpose_y_0 = const()[name = string("attention_5_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_5_attention_1 = matmul(transpose_x = attention_5_attention_1_transpose_x_0, transpose_y = attention_5_attention_1_transpose_y_0, x = softmax_11, y = attention_5_slice_value_cache_heads_1)[name = string("attention_5_attention_1")]; + int32 attention_5_concat_attention_all_heads_axis_0 = const()[name = string("attention_5_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_5_concat_attention_all_heads_interleave_0 = const()[name = string("attention_5_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_5_concat_attention_all_heads = concat(axis = attention_5_concat_attention_all_heads_axis_0, interleave = attention_5_concat_attention_all_heads_interleave_0, values = (attention_5_attention_0, attention_5_attention_1))[name = string("attention_5_concat_attention_all_heads")]; + tensor attention_5_channels_first_retransposed_perm_0 = const()[name = string("attention_5_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_5_reshaped_shape_0 = const()[name = string("attention_5_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_5_channels_first_retransposed = transpose(perm = attention_5_channels_first_retransposed_perm_0, x = attention_5_concat_attention_all_heads)[name = string("transpose_37")]; + tensor attention_5_reshaped = reshape(shape = attention_5_reshaped_shape_0, x = attention_5_channels_first_retransposed)[name = string("attention_5_reshaped")]; + tensor attention_5_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350047872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350650048))))[name = string("attention_5_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_21 = constexpr_blockwise_shift_scale(data = attention_5_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350678784))))[name = string("constexpr_blockwise_shift_scale_21")]; + tensor attention_5_outproj_strides_0 = const()[name = string("attention_5_outproj_strides_0"), val = tensor([1])]; + string attention_5_outproj_pad_type_0 = const()[name = string("attention_5_outproj_pad_type_0"), val = string("valid")]; + tensor attention_5_outproj_pad_0 = const()[name = string("attention_5_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_5_outproj_dilations_0 = const()[name = string("attention_5_outproj_dilations_0"), val = tensor([1])]; + int32 attention_5_outproj_groups_0 = const()[name = string("attention_5_outproj_groups_0"), val = int32(1)]; + tensor attention_5_outproj = conv(dilations = attention_5_outproj_dilations_0, groups = attention_5_outproj_groups_0, pad = attention_5_outproj_pad_0, pad_type = attention_5_outproj_pad_type_0, strides = attention_5_outproj_strides_0, weight = constexpr_blockwise_shift_scale_21, x = attention_5_reshaped)[name = string("attention_5_outproj")]; + tensor block_5_residual_1 = add(x = block_4_residual_2, y = attention_5_outproj)[name = string("block_5_residual_1")]; + tensor block_5_ffn_rmsnorm_abs = abs(x = block_5_residual_1)[name = string("block_5_ffn_rmsnorm_abs")]; + tensor block_5_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_5_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_5_ffn_rmsnorm_maxval = reduce_max(axes = block_5_ffn_rmsnorm_maxval_axes_0, keep_dims = block_5_ffn_rmsnorm_maxval_keep_dims_0, x = block_5_ffn_rmsnorm_abs)[name = string("block_5_ffn_rmsnorm_maxval")]; + fp16 block_5_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_5_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_5_ffn_rmsnorm_maxval_clipped = clip(alpha = block_5_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_5_ffn_rmsnorm_maxval_clipped_beta_0, x = block_5_ffn_rmsnorm_maxval)[name = string("block_5_ffn_rmsnorm_maxval_clipped")]; + tensor block_5_ffn_rmsnorm_scaled = real_div(x = block_5_residual_1, y = block_5_ffn_rmsnorm_maxval_clipped)[name = string("block_5_ffn_rmsnorm_scaled")]; + tensor block_5_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_5_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_5_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_5_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_5_ffn_rmsnorm_scaled)[name = string("block_5_ffn_rmsnorm_squared_sum")]; + fp16 block_5_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_5_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_5_ffn_rmsnorm_rsqrt_epsilon_0, x = block_5_ffn_rmsnorm_squared_sum)[name = string("block_5_ffn_rmsnorm_rsqrt")]; + fp16 block_5_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_5_ffn_rmsnorm_dim_scaled = mul(x = block_5_ffn_rmsnorm_scaled, y = block_5_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_5_ffn_rmsnorm_dim_scaled")]; + tensor block_5_ffn_rmsnorm_normalized = mul(x = block_5_ffn_rmsnorm_dim_scaled, y = block_5_ffn_rmsnorm_rsqrt)[name = string("block_5_ffn_rmsnorm_normalized")]; + tensor block_5_ffn_rmsnorm_y_0 = const()[name = string("block_5_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350680640)))]; + tensor block_5_ffn_rmsnorm = mul(x = block_5_ffn_rmsnorm_normalized, y = block_5_ffn_rmsnorm_y_0)[name = string("block_5_ffn_rmsnorm")]; + tensor block_5_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350682496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353951168))))[name = string("block_5_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_22 = constexpr_blockwise_shift_scale(data = block_5_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354106880))))[name = string("constexpr_blockwise_shift_scale_22")]; + tensor block_5_ffn_inproj_strides_0 = const()[name = string("block_5_ffn_inproj_strides_0"), val = tensor([1])]; + string block_5_ffn_inproj_pad_type_0 = const()[name = string("block_5_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_5_ffn_inproj_pad_0 = const()[name = string("block_5_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_5_ffn_inproj_dilations_0 = const()[name = string("block_5_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_5_ffn_inproj_groups_0 = const()[name = string("block_5_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_5_ffn_inproj = conv(dilations = block_5_ffn_inproj_dilations_0, groups = block_5_ffn_inproj_groups_0, pad = block_5_ffn_inproj_pad_0, pad_type = block_5_ffn_inproj_pad_type_0, strides = block_5_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_22, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_inproj")]; + tensor block_5_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354116672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357385344))))[name = string("block_5_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_23 = constexpr_blockwise_shift_scale(data = block_5_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357541056))))[name = string("constexpr_blockwise_shift_scale_23")]; + tensor block_5_ffn_g_strides_0 = const()[name = string("block_5_ffn_g_strides_0"), val = tensor([1])]; + string block_5_ffn_g_pad_type_0 = const()[name = string("block_5_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_5_ffn_g_pad_0 = const()[name = string("block_5_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_5_ffn_g_dilations_0 = const()[name = string("block_5_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_5_ffn_g_groups_0 = const()[name = string("block_5_ffn_g_groups_0"), val = int32(1)]; + tensor block_5_ffn_g = conv(dilations = block_5_ffn_g_dilations_0, groups = block_5_ffn_g_groups_0, pad = block_5_ffn_g_pad_0, pad_type = block_5_ffn_g_pad_type_0, strides = block_5_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_23, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_g")]; + tensor block_5_ffn_g_activation = silu(x = block_5_ffn_g)[name = string("block_5_ffn_g_activation")]; + tensor block_5_ffn_x_gated = mul(x = block_5_ffn_inproj, y = block_5_ffn_g_activation)[name = string("block_5_ffn_x_gated")]; + tensor block_5_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357550848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360819520))))[name = string("block_5_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_24 = constexpr_blockwise_shift_scale(data = block_5_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360848256))))[name = string("constexpr_blockwise_shift_scale_24")]; + tensor block_5_ffn_outproj_strides_0 = const()[name = string("block_5_ffn_outproj_strides_0"), val = tensor([1])]; + string block_5_ffn_outproj_pad_type_0 = const()[name = string("block_5_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_5_ffn_outproj_pad_0 = const()[name = string("block_5_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_5_ffn_outproj_dilations_0 = const()[name = string("block_5_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_5_ffn_outproj_groups_0 = const()[name = string("block_5_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_5_ffn_outproj = conv(dilations = block_5_ffn_outproj_dilations_0, groups = block_5_ffn_outproj_groups_0, pad = block_5_ffn_outproj_pad_0, pad_type = block_5_ffn_outproj_pad_type_0, strides = block_5_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_24, x = block_5_ffn_x_gated)[name = string("block_5_ffn_outproj")]; + tensor block_5_residual_2 = add(x = block_5_ffn_outproj, y = block_5_residual_1)[name = string("block_5_residual_2")]; + tensor block_6_attention_rmsnorm_abs = abs(x = block_5_residual_2)[name = string("block_6_attention_rmsnorm_abs")]; + tensor block_6_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_6_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_6_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_6_attention_rmsnorm_maxval = reduce_max(axes = block_6_attention_rmsnorm_maxval_axes_0, keep_dims = block_6_attention_rmsnorm_maxval_keep_dims_0, x = block_6_attention_rmsnorm_abs)[name = string("block_6_attention_rmsnorm_maxval")]; + fp16 block_6_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_6_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_6_attention_rmsnorm_maxval_clipped = clip(alpha = block_6_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_6_attention_rmsnorm_maxval_clipped_beta_0, x = block_6_attention_rmsnorm_maxval)[name = string("block_6_attention_rmsnorm_maxval_clipped")]; + tensor block_6_attention_rmsnorm_scaled = real_div(x = block_5_residual_2, y = block_6_attention_rmsnorm_maxval_clipped)[name = string("block_6_attention_rmsnorm_scaled")]; + tensor block_6_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_6_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_6_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_6_attention_rmsnorm_squared_sum_keep_dims_0, x = block_6_attention_rmsnorm_scaled)[name = string("block_6_attention_rmsnorm_squared_sum")]; + fp16 block_6_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_6_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_6_attention_rmsnorm_rsqrt_epsilon_0, x = block_6_attention_rmsnorm_squared_sum)[name = string("block_6_attention_rmsnorm_rsqrt")]; + fp16 block_6_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_6_attention_rmsnorm_dim_scaled = mul(x = block_6_attention_rmsnorm_scaled, y = block_6_attention_rmsnorm_dim_scaled_y_0)[name = string("block_6_attention_rmsnorm_dim_scaled")]; + tensor block_6_attention_rmsnorm_normalized = mul(x = block_6_attention_rmsnorm_dim_scaled, y = block_6_attention_rmsnorm_rsqrt)[name = string("block_6_attention_rmsnorm_normalized")]; + tensor block_6_attention_rmsnorm_y_0 = const()[name = string("block_6_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360850112)))]; + tensor block_6_attention_rmsnorm = mul(x = block_6_attention_rmsnorm_normalized, y = block_6_attention_rmsnorm_y_0)[name = string("block_6_attention_rmsnorm")]; + tensor attention_6_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360851968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361626176))))[name = string("attention_6_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_25 = constexpr_blockwise_shift_scale(data = attention_6_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361663104))))[name = string("constexpr_blockwise_shift_scale_25")]; + tensor attention_6_qkvproj_bias_0 = const()[name = string("attention_6_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361665472)))]; + tensor attention_6_qkvproj_strides_0 = const()[name = string("attention_6_qkvproj_strides_0"), val = tensor([1])]; + string attention_6_qkvproj_pad_type_0 = const()[name = string("attention_6_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_6_qkvproj_pad_0 = const()[name = string("attention_6_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_6_qkvproj_dilations_0 = const()[name = string("attention_6_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_6_qkvproj_groups_0 = const()[name = string("attention_6_qkvproj_groups_0"), val = int32(1)]; + tensor attention_6_qkvproj = conv(bias = attention_6_qkvproj_bias_0, dilations = attention_6_qkvproj_dilations_0, groups = attention_6_qkvproj_groups_0, pad = attention_6_qkvproj_pad_0, pad_type = attention_6_qkvproj_pad_type_0, strides = attention_6_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_25, x = block_6_attention_rmsnorm)[name = string("attention_6_qkvproj")]; + tensor attention_6_head_reshape_shape_0 = const()[name = string("attention_6_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_6_head_reshape = reshape(shape = attention_6_head_reshape_shape_0, x = attention_6_qkvproj)[name = string("attention_6_head_reshape")]; + tensor attention_6_head_transpose_perm_0 = const()[name = string("attention_6_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_6_split_qkv_heads_axis_0 = const()[name = string("attention_6_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_6_split_qkv_heads_split_sizes_0 = const()[name = string("attention_6_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_6_head_transpose = transpose(perm = attention_6_head_transpose_perm_0, x = attention_6_head_reshape)[name = string("transpose_36")]; + tensor attention_6_split_qkv_heads_0, tensor attention_6_split_qkv_heads_1, tensor attention_6_split_qkv_heads_2 = split(axis = attention_6_split_qkv_heads_axis_0, split_sizes = attention_6_split_qkv_heads_split_sizes_0, x = attention_6_head_transpose)[name = string("attention_6_split_qkv_heads")]; + tensor attention_6_q_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_6_q_rope_lhs_mult")]; + int32 attention_6_q_rotate_half_split_num_splits_0 = const()[name = string("attention_6_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_6_q_rotate_half_split_axis_0 = const()[name = string("attention_6_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_6_q_rotate_half_split_0, tensor attention_6_q_rotate_half_split_1 = split(axis = attention_6_q_rotate_half_split_axis_0, num_splits = attention_6_q_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_0)[name = string("attention_6_q_rotate_half_split")]; + fp16 attention_6_q_rotate_half_neg_y_0 = const()[name = string("attention_6_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_6_q_rotate_half_neg = mul(x = attention_6_q_rotate_half_split_1, y = attention_6_q_rotate_half_neg_y_0)[name = string("attention_6_q_rotate_half_neg")]; + int32 attention_6_q_rotate_half_concat_axis_0 = const()[name = string("attention_6_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_6_q_rotate_half_concat_interleave_0 = const()[name = string("attention_6_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_6_q_rotate_half_concat = concat(axis = attention_6_q_rotate_half_concat_axis_0, interleave = attention_6_q_rotate_half_concat_interleave_0, values = (attention_6_q_rotate_half_neg, attention_6_q_rotate_half_split_0))[name = string("attention_6_q_rotate_half_concat")]; + tensor attention_6_q_rope_rhs_mult = mul(x = attention_6_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_q_rope_rhs_mult")]; + tensor attention_6_q_rope = add(x = attention_6_q_rope_lhs_mult, y = attention_6_q_rope_rhs_mult)[name = string("attention_6_q_rope")]; + tensor attention_6_k_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_6_k_rope_lhs_mult")]; + int32 attention_6_k_rotate_half_split_num_splits_0 = const()[name = string("attention_6_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_6_k_rotate_half_split_axis_0 = const()[name = string("attention_6_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_6_k_rotate_half_split_0, tensor attention_6_k_rotate_half_split_1 = split(axis = attention_6_k_rotate_half_split_axis_0, num_splits = attention_6_k_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_1)[name = string("attention_6_k_rotate_half_split")]; + fp16 attention_6_k_rotate_half_neg_y_0 = const()[name = string("attention_6_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_6_k_rotate_half_neg = mul(x = attention_6_k_rotate_half_split_1, y = attention_6_k_rotate_half_neg_y_0)[name = string("attention_6_k_rotate_half_neg")]; + int32 attention_6_k_rotate_half_concat_axis_0 = const()[name = string("attention_6_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_6_k_rotate_half_concat_interleave_0 = const()[name = string("attention_6_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_6_k_rotate_half_concat = concat(axis = attention_6_k_rotate_half_concat_axis_0, interleave = attention_6_k_rotate_half_concat_interleave_0, values = (attention_6_k_rotate_half_neg, attention_6_k_rotate_half_split_0))[name = string("attention_6_k_rotate_half_concat")]; + tensor attention_6_k_rope_rhs_mult = mul(x = attention_6_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_k_rope_rhs_mult")]; + tensor attention_6_k_rope = add(x = attention_6_k_rope_lhs_mult, y = attention_6_k_rope_rhs_mult)[name = string("attention_6_k_rope")]; + int32 attention_6_q_splits_axis_0 = const()[name = string("attention_6_q_splits_axis_0"), val = int32(1)]; + int32 attention_6_q_splits_num_splits_0 = const()[name = string("attention_6_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_6_q_splits_0, tensor attention_6_q_splits_1 = split(axis = attention_6_q_splits_axis_0, num_splits = attention_6_q_splits_num_splits_0, x = attention_6_q_rope)[name = string("attention_6_q_splits")]; + tensor attention_6_update_begin_0_values0_0 = const()[name = string("attention_6_update_begin_0_values0_0"), val = tensor([6])]; + tensor attention_6_update_begin_0_values1_0 = const()[name = string("attention_6_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_6_update_begin_0_values3_0 = const()[name = string("attention_6_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_6_update_begin_0_axis_0 = const()[name = string("attention_6_update_begin_0_axis_0"), val = int32(0)]; + bool attention_6_update_begin_0_interleave_0 = const()[name = string("attention_6_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_6_update_begin_0 = concat(axis = attention_6_update_begin_0_axis_0, interleave = attention_6_update_begin_0_interleave_0, values = (attention_6_update_begin_0_values0_0, attention_6_update_begin_0_values1_0, query_pos1, attention_6_update_begin_0_values3_0))[name = string("attention_6_update_begin_0")]; + tensor attention_6_update_end_0_values0_0 = const()[name = string("attention_6_update_end_0_values0_0"), val = tensor([7])]; + tensor attention_6_update_end_0_values1_0 = const()[name = string("attention_6_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_6_update_end_0_values3_0 = const()[name = string("attention_6_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_6_update_end_0_axis_0 = const()[name = string("attention_6_update_end_0_axis_0"), val = int32(0)]; + bool attention_6_update_end_0_interleave_0 = const()[name = string("attention_6_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_6_update_end_0 = concat(axis = attention_6_update_end_0_axis_0, interleave = attention_6_update_end_0_interleave_0, values = (attention_6_update_end_0_values0_0, attention_6_update_end_0_values1_0, end_pos_0, attention_6_update_end_0_values3_0))[name = string("attention_6_update_end_0")]; + tensor attention_6_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_updated_key_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_key_cache_0_squeeze_mask_0, update = attention_6_k_rope, x = coreml_update_state_10)[name = string("attention_6_updated_key_cache_0")]; + write_state(data = attention_6_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_12_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache_state)[name = string("coreml_update_state_12")]; + tensor attention_6_key_cache_begin_0 = const()[name = string("attention_6_key_cache_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor attention_6_key_cache_end_0 = const()[name = string("attention_6_key_cache_end_0"), val = tensor([7, 2, 512, 64])]; + tensor attention_6_key_cache_squeeze_mask_0 = const()[name = string("attention_6_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_key_cache = slice_by_index(begin = attention_6_key_cache_begin_0, end = attention_6_key_cache_end_0, squeeze_mask = attention_6_key_cache_squeeze_mask_0, x = coreml_update_state_12)[name = string("attention_6_key_cache")]; + int32 attention_6_key_cache_head_axis_0 = const()[name = string("attention_6_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_6_key_cache_head_num_splits_0 = const()[name = string("attention_6_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_6_key_cache_head_0, tensor attention_6_key_cache_head_1 = split(axis = attention_6_key_cache_head_axis_0, num_splits = attention_6_key_cache_head_num_splits_0, x = attention_6_key_cache)[name = string("attention_6_key_cache_head")]; + tensor attention_6_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_updated_value_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_value_cache_0_squeeze_mask_0, update = attention_6_split_qkv_heads_2, x = coreml_update_state_11)[name = string("attention_6_updated_value_cache_0")]; + write_state(data = attention_6_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_13_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache_state)[name = string("coreml_update_state_13")]; + tensor attention_6_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_6_slice_current_layer_value_cache_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor attention_6_slice_current_layer_value_cache_end_0 = const()[name = string("attention_6_slice_current_layer_value_cache_end_0"), val = tensor([7, 2, 512, 64])]; + tensor attention_6_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_6_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_slice_current_layer_value_cache = slice_by_index(begin = attention_6_slice_current_layer_value_cache_begin_0, end = attention_6_slice_current_layer_value_cache_end_0, squeeze_mask = attention_6_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_13)[name = string("attention_6_slice_current_layer_value_cache")]; + int32 attention_6_slice_value_cache_heads_axis_0 = const()[name = string("attention_6_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_6_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_6_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_6_slice_value_cache_heads_0, tensor attention_6_slice_value_cache_heads_1 = split(axis = attention_6_slice_value_cache_heads_axis_0, num_splits = attention_6_slice_value_cache_heads_num_splits_0, x = attention_6_slice_current_layer_value_cache)[name = string("attention_6_slice_value_cache_heads")]; + bool attention_6_scores_0_transpose_y_0 = const()[name = string("attention_6_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_6_scores_0_transpose_x_0 = const()[name = string("attention_6_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_6_scores_0 = matmul(transpose_x = attention_6_scores_0_transpose_x_0, transpose_y = attention_6_scores_0_transpose_y_0, x = attention_6_key_cache_head_0, y = attention_6_q_splits_0)[name = string("attention_6_scores_0")]; + fp16 attention_6_scaled_scores_0_y_0 = const()[name = string("attention_6_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_6_scaled_scores_0 = mul(x = attention_6_scores_0, y = attention_6_scaled_scores_0_y_0)[name = string("attention_6_scaled_scores_0")]; + tensor attention_6_masked_scaled_scores_0 = add(x = attention_6_scaled_scores_0, y = transpose_0)[name = string("attention_6_masked_scaled_scores_0")]; + int32 softmax_12_axis_0 = const()[name = string("softmax_12_axis_0"), val = int32(-2)]; + tensor softmax_12 = softmax(axis = softmax_12_axis_0, x = attention_6_masked_scaled_scores_0)[name = string("softmax_12")]; + bool attention_6_attention_0_transpose_x_0 = const()[name = string("attention_6_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_6_attention_0_transpose_y_0 = const()[name = string("attention_6_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_6_attention_0 = matmul(transpose_x = attention_6_attention_0_transpose_x_0, transpose_y = attention_6_attention_0_transpose_y_0, x = softmax_12, y = attention_6_slice_value_cache_heads_0)[name = string("attention_6_attention_0")]; + bool attention_6_scores_1_transpose_y_0 = const()[name = string("attention_6_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_6_scores_1_transpose_x_0 = const()[name = string("attention_6_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_6_scores_1 = matmul(transpose_x = attention_6_scores_1_transpose_x_0, transpose_y = attention_6_scores_1_transpose_y_0, x = attention_6_key_cache_head_1, y = attention_6_q_splits_1)[name = string("attention_6_scores_1")]; + fp16 attention_6_scaled_scores_1_y_0 = const()[name = string("attention_6_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_6_scaled_scores_1 = mul(x = attention_6_scores_1, y = attention_6_scaled_scores_1_y_0)[name = string("attention_6_scaled_scores_1")]; + tensor attention_6_masked_scaled_scores_1 = add(x = attention_6_scaled_scores_1, y = transpose_0)[name = string("attention_6_masked_scaled_scores_1")]; + int32 softmax_13_axis_0 = const()[name = string("softmax_13_axis_0"), val = int32(-2)]; + tensor softmax_13 = softmax(axis = softmax_13_axis_0, x = attention_6_masked_scaled_scores_1)[name = string("softmax_13")]; + bool attention_6_attention_1_transpose_x_0 = const()[name = string("attention_6_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_6_attention_1_transpose_y_0 = const()[name = string("attention_6_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_6_attention_1 = matmul(transpose_x = attention_6_attention_1_transpose_x_0, transpose_y = attention_6_attention_1_transpose_y_0, x = softmax_13, y = attention_6_slice_value_cache_heads_1)[name = string("attention_6_attention_1")]; + int32 attention_6_concat_attention_all_heads_axis_0 = const()[name = string("attention_6_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_6_concat_attention_all_heads_interleave_0 = const()[name = string("attention_6_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_6_concat_attention_all_heads = concat(axis = attention_6_concat_attention_all_heads_axis_0, interleave = attention_6_concat_attention_all_heads_interleave_0, values = (attention_6_attention_0, attention_6_attention_1))[name = string("attention_6_concat_attention_all_heads")]; + tensor attention_6_channels_first_retransposed_perm_0 = const()[name = string("attention_6_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_6_reshaped_shape_0 = const()[name = string("attention_6_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_6_channels_first_retransposed = transpose(perm = attention_6_channels_first_retransposed_perm_0, x = attention_6_concat_attention_all_heads)[name = string("transpose_35")]; + tensor attention_6_reshaped = reshape(shape = attention_6_reshaped_shape_0, x = attention_6_channels_first_retransposed)[name = string("attention_6_reshaped")]; + tensor attention_6_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361667840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362270016))))[name = string("attention_6_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_26 = constexpr_blockwise_shift_scale(data = attention_6_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362298752))))[name = string("constexpr_blockwise_shift_scale_26")]; + tensor attention_6_outproj_strides_0 = const()[name = string("attention_6_outproj_strides_0"), val = tensor([1])]; + string attention_6_outproj_pad_type_0 = const()[name = string("attention_6_outproj_pad_type_0"), val = string("valid")]; + tensor attention_6_outproj_pad_0 = const()[name = string("attention_6_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_6_outproj_dilations_0 = const()[name = string("attention_6_outproj_dilations_0"), val = tensor([1])]; + int32 attention_6_outproj_groups_0 = const()[name = string("attention_6_outproj_groups_0"), val = int32(1)]; + tensor attention_6_outproj = conv(dilations = attention_6_outproj_dilations_0, groups = attention_6_outproj_groups_0, pad = attention_6_outproj_pad_0, pad_type = attention_6_outproj_pad_type_0, strides = attention_6_outproj_strides_0, weight = constexpr_blockwise_shift_scale_26, x = attention_6_reshaped)[name = string("attention_6_outproj")]; + tensor block_6_residual_1 = add(x = block_5_residual_2, y = attention_6_outproj)[name = string("block_6_residual_1")]; + tensor block_6_ffn_rmsnorm_abs = abs(x = block_6_residual_1)[name = string("block_6_ffn_rmsnorm_abs")]; + tensor block_6_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_6_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_6_ffn_rmsnorm_maxval = reduce_max(axes = block_6_ffn_rmsnorm_maxval_axes_0, keep_dims = block_6_ffn_rmsnorm_maxval_keep_dims_0, x = block_6_ffn_rmsnorm_abs)[name = string("block_6_ffn_rmsnorm_maxval")]; + fp16 block_6_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_6_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_6_ffn_rmsnorm_maxval_clipped = clip(alpha = block_6_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_6_ffn_rmsnorm_maxval_clipped_beta_0, x = block_6_ffn_rmsnorm_maxval)[name = string("block_6_ffn_rmsnorm_maxval_clipped")]; + tensor block_6_ffn_rmsnorm_scaled = real_div(x = block_6_residual_1, y = block_6_ffn_rmsnorm_maxval_clipped)[name = string("block_6_ffn_rmsnorm_scaled")]; + tensor block_6_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_6_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_6_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_6_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_6_ffn_rmsnorm_scaled)[name = string("block_6_ffn_rmsnorm_squared_sum")]; + fp16 block_6_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_6_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_6_ffn_rmsnorm_rsqrt_epsilon_0, x = block_6_ffn_rmsnorm_squared_sum)[name = string("block_6_ffn_rmsnorm_rsqrt")]; + fp16 block_6_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_6_ffn_rmsnorm_dim_scaled = mul(x = block_6_ffn_rmsnorm_scaled, y = block_6_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_6_ffn_rmsnorm_dim_scaled")]; + tensor block_6_ffn_rmsnorm_normalized = mul(x = block_6_ffn_rmsnorm_dim_scaled, y = block_6_ffn_rmsnorm_rsqrt)[name = string("block_6_ffn_rmsnorm_normalized")]; + tensor block_6_ffn_rmsnorm_y_0 = const()[name = string("block_6_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362300608)))]; + tensor block_6_ffn_rmsnorm = mul(x = block_6_ffn_rmsnorm_normalized, y = block_6_ffn_rmsnorm_y_0)[name = string("block_6_ffn_rmsnorm")]; + tensor block_6_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362302464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365571136))))[name = string("block_6_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_27 = constexpr_blockwise_shift_scale(data = block_6_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365726848))))[name = string("constexpr_blockwise_shift_scale_27")]; + tensor block_6_ffn_inproj_strides_0 = const()[name = string("block_6_ffn_inproj_strides_0"), val = tensor([1])]; + string block_6_ffn_inproj_pad_type_0 = const()[name = string("block_6_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_6_ffn_inproj_pad_0 = const()[name = string("block_6_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_6_ffn_inproj_dilations_0 = const()[name = string("block_6_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_6_ffn_inproj_groups_0 = const()[name = string("block_6_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_6_ffn_inproj = conv(dilations = block_6_ffn_inproj_dilations_0, groups = block_6_ffn_inproj_groups_0, pad = block_6_ffn_inproj_pad_0, pad_type = block_6_ffn_inproj_pad_type_0, strides = block_6_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_27, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_inproj")]; + tensor block_6_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365736640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369005312))))[name = string("block_6_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_28 = constexpr_blockwise_shift_scale(data = block_6_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369161024))))[name = string("constexpr_blockwise_shift_scale_28")]; + tensor block_6_ffn_g_strides_0 = const()[name = string("block_6_ffn_g_strides_0"), val = tensor([1])]; + string block_6_ffn_g_pad_type_0 = const()[name = string("block_6_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_6_ffn_g_pad_0 = const()[name = string("block_6_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_6_ffn_g_dilations_0 = const()[name = string("block_6_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_6_ffn_g_groups_0 = const()[name = string("block_6_ffn_g_groups_0"), val = int32(1)]; + tensor block_6_ffn_g = conv(dilations = block_6_ffn_g_dilations_0, groups = block_6_ffn_g_groups_0, pad = block_6_ffn_g_pad_0, pad_type = block_6_ffn_g_pad_type_0, strides = block_6_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_28, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_g")]; + tensor block_6_ffn_g_activation = silu(x = block_6_ffn_g)[name = string("block_6_ffn_g_activation")]; + tensor block_6_ffn_x_gated = mul(x = block_6_ffn_inproj, y = block_6_ffn_g_activation)[name = string("block_6_ffn_x_gated")]; + tensor block_6_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369170816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372439488))))[name = string("block_6_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_29 = constexpr_blockwise_shift_scale(data = block_6_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372468224))))[name = string("constexpr_blockwise_shift_scale_29")]; + tensor block_6_ffn_outproj_strides_0 = const()[name = string("block_6_ffn_outproj_strides_0"), val = tensor([1])]; + string block_6_ffn_outproj_pad_type_0 = const()[name = string("block_6_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_6_ffn_outproj_pad_0 = const()[name = string("block_6_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_6_ffn_outproj_dilations_0 = const()[name = string("block_6_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_6_ffn_outproj_groups_0 = const()[name = string("block_6_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_6_ffn_outproj = conv(dilations = block_6_ffn_outproj_dilations_0, groups = block_6_ffn_outproj_groups_0, pad = block_6_ffn_outproj_pad_0, pad_type = block_6_ffn_outproj_pad_type_0, strides = block_6_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_29, x = block_6_ffn_x_gated)[name = string("block_6_ffn_outproj")]; + tensor block_6_residual_2 = add(x = block_6_ffn_outproj, y = block_6_residual_1)[name = string("block_6_residual_2")]; + tensor block_7_attention_rmsnorm_abs = abs(x = block_6_residual_2)[name = string("block_7_attention_rmsnorm_abs")]; + tensor block_7_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_7_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_7_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_7_attention_rmsnorm_maxval = reduce_max(axes = block_7_attention_rmsnorm_maxval_axes_0, keep_dims = block_7_attention_rmsnorm_maxval_keep_dims_0, x = block_7_attention_rmsnorm_abs)[name = string("block_7_attention_rmsnorm_maxval")]; + fp16 block_7_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_7_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_7_attention_rmsnorm_maxval_clipped = clip(alpha = block_7_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_7_attention_rmsnorm_maxval_clipped_beta_0, x = block_7_attention_rmsnorm_maxval)[name = string("block_7_attention_rmsnorm_maxval_clipped")]; + tensor block_7_attention_rmsnorm_scaled = real_div(x = block_6_residual_2, y = block_7_attention_rmsnorm_maxval_clipped)[name = string("block_7_attention_rmsnorm_scaled")]; + tensor block_7_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_7_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_7_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_7_attention_rmsnorm_squared_sum_keep_dims_0, x = block_7_attention_rmsnorm_scaled)[name = string("block_7_attention_rmsnorm_squared_sum")]; + fp16 block_7_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_7_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_7_attention_rmsnorm_rsqrt_epsilon_0, x = block_7_attention_rmsnorm_squared_sum)[name = string("block_7_attention_rmsnorm_rsqrt")]; + fp16 block_7_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_7_attention_rmsnorm_dim_scaled = mul(x = block_7_attention_rmsnorm_scaled, y = block_7_attention_rmsnorm_dim_scaled_y_0)[name = string("block_7_attention_rmsnorm_dim_scaled")]; + tensor block_7_attention_rmsnorm_normalized = mul(x = block_7_attention_rmsnorm_dim_scaled, y = block_7_attention_rmsnorm_rsqrt)[name = string("block_7_attention_rmsnorm_normalized")]; + tensor block_7_attention_rmsnorm_y_0 = const()[name = string("block_7_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372470080)))]; + tensor block_7_attention_rmsnorm = mul(x = block_7_attention_rmsnorm_normalized, y = block_7_attention_rmsnorm_y_0)[name = string("block_7_attention_rmsnorm")]; + tensor attention_7_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372471936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373246144))))[name = string("attention_7_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_30 = constexpr_blockwise_shift_scale(data = attention_7_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373283072))))[name = string("constexpr_blockwise_shift_scale_30")]; + tensor attention_7_qkvproj_bias_0 = const()[name = string("attention_7_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373285440)))]; + tensor attention_7_qkvproj_strides_0 = const()[name = string("attention_7_qkvproj_strides_0"), val = tensor([1])]; + string attention_7_qkvproj_pad_type_0 = const()[name = string("attention_7_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_7_qkvproj_pad_0 = const()[name = string("attention_7_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_7_qkvproj_dilations_0 = const()[name = string("attention_7_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_7_qkvproj_groups_0 = const()[name = string("attention_7_qkvproj_groups_0"), val = int32(1)]; + tensor attention_7_qkvproj = conv(bias = attention_7_qkvproj_bias_0, dilations = attention_7_qkvproj_dilations_0, groups = attention_7_qkvproj_groups_0, pad = attention_7_qkvproj_pad_0, pad_type = attention_7_qkvproj_pad_type_0, strides = attention_7_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_30, x = block_7_attention_rmsnorm)[name = string("attention_7_qkvproj")]; + tensor attention_7_head_reshape_shape_0 = const()[name = string("attention_7_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_7_head_reshape = reshape(shape = attention_7_head_reshape_shape_0, x = attention_7_qkvproj)[name = string("attention_7_head_reshape")]; + tensor attention_7_head_transpose_perm_0 = const()[name = string("attention_7_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_7_split_qkv_heads_axis_0 = const()[name = string("attention_7_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_7_split_qkv_heads_split_sizes_0 = const()[name = string("attention_7_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_7_head_transpose = transpose(perm = attention_7_head_transpose_perm_0, x = attention_7_head_reshape)[name = string("transpose_34")]; + tensor attention_7_split_qkv_heads_0, tensor attention_7_split_qkv_heads_1, tensor attention_7_split_qkv_heads_2 = split(axis = attention_7_split_qkv_heads_axis_0, split_sizes = attention_7_split_qkv_heads_split_sizes_0, x = attention_7_head_transpose)[name = string("attention_7_split_qkv_heads")]; + tensor attention_7_q_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_7_q_rope_lhs_mult")]; + int32 attention_7_q_rotate_half_split_num_splits_0 = const()[name = string("attention_7_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_7_q_rotate_half_split_axis_0 = const()[name = string("attention_7_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_7_q_rotate_half_split_0, tensor attention_7_q_rotate_half_split_1 = split(axis = attention_7_q_rotate_half_split_axis_0, num_splits = attention_7_q_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_0)[name = string("attention_7_q_rotate_half_split")]; + fp16 attention_7_q_rotate_half_neg_y_0 = const()[name = string("attention_7_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_7_q_rotate_half_neg = mul(x = attention_7_q_rotate_half_split_1, y = attention_7_q_rotate_half_neg_y_0)[name = string("attention_7_q_rotate_half_neg")]; + int32 attention_7_q_rotate_half_concat_axis_0 = const()[name = string("attention_7_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_7_q_rotate_half_concat_interleave_0 = const()[name = string("attention_7_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_7_q_rotate_half_concat = concat(axis = attention_7_q_rotate_half_concat_axis_0, interleave = attention_7_q_rotate_half_concat_interleave_0, values = (attention_7_q_rotate_half_neg, attention_7_q_rotate_half_split_0))[name = string("attention_7_q_rotate_half_concat")]; + tensor attention_7_q_rope_rhs_mult = mul(x = attention_7_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_q_rope_rhs_mult")]; + tensor attention_7_q_rope = add(x = attention_7_q_rope_lhs_mult, y = attention_7_q_rope_rhs_mult)[name = string("attention_7_q_rope")]; + tensor attention_7_k_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_7_k_rope_lhs_mult")]; + int32 attention_7_k_rotate_half_split_num_splits_0 = const()[name = string("attention_7_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_7_k_rotate_half_split_axis_0 = const()[name = string("attention_7_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_7_k_rotate_half_split_0, tensor attention_7_k_rotate_half_split_1 = split(axis = attention_7_k_rotate_half_split_axis_0, num_splits = attention_7_k_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_1)[name = string("attention_7_k_rotate_half_split")]; + fp16 attention_7_k_rotate_half_neg_y_0 = const()[name = string("attention_7_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_7_k_rotate_half_neg = mul(x = attention_7_k_rotate_half_split_1, y = attention_7_k_rotate_half_neg_y_0)[name = string("attention_7_k_rotate_half_neg")]; + int32 attention_7_k_rotate_half_concat_axis_0 = const()[name = string("attention_7_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_7_k_rotate_half_concat_interleave_0 = const()[name = string("attention_7_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_7_k_rotate_half_concat = concat(axis = attention_7_k_rotate_half_concat_axis_0, interleave = attention_7_k_rotate_half_concat_interleave_0, values = (attention_7_k_rotate_half_neg, attention_7_k_rotate_half_split_0))[name = string("attention_7_k_rotate_half_concat")]; + tensor attention_7_k_rope_rhs_mult = mul(x = attention_7_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_k_rope_rhs_mult")]; + tensor attention_7_k_rope = add(x = attention_7_k_rope_lhs_mult, y = attention_7_k_rope_rhs_mult)[name = string("attention_7_k_rope")]; + int32 attention_7_q_splits_axis_0 = const()[name = string("attention_7_q_splits_axis_0"), val = int32(1)]; + int32 attention_7_q_splits_num_splits_0 = const()[name = string("attention_7_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_7_q_splits_0, tensor attention_7_q_splits_1 = split(axis = attention_7_q_splits_axis_0, num_splits = attention_7_q_splits_num_splits_0, x = attention_7_q_rope)[name = string("attention_7_q_splits")]; + tensor attention_7_update_begin_0_values0_0 = const()[name = string("attention_7_update_begin_0_values0_0"), val = tensor([7])]; + tensor attention_7_update_begin_0_values1_0 = const()[name = string("attention_7_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_7_update_begin_0_values3_0 = const()[name = string("attention_7_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_7_update_begin_0_axis_0 = const()[name = string("attention_7_update_begin_0_axis_0"), val = int32(0)]; + bool attention_7_update_begin_0_interleave_0 = const()[name = string("attention_7_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_7_update_begin_0 = concat(axis = attention_7_update_begin_0_axis_0, interleave = attention_7_update_begin_0_interleave_0, values = (attention_7_update_begin_0_values0_0, attention_7_update_begin_0_values1_0, query_pos1, attention_7_update_begin_0_values3_0))[name = string("attention_7_update_begin_0")]; + tensor attention_7_update_end_0_values0_0 = const()[name = string("attention_7_update_end_0_values0_0"), val = tensor([8])]; + tensor attention_7_update_end_0_values1_0 = const()[name = string("attention_7_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_7_update_end_0_values3_0 = const()[name = string("attention_7_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_7_update_end_0_axis_0 = const()[name = string("attention_7_update_end_0_axis_0"), val = int32(0)]; + bool attention_7_update_end_0_interleave_0 = const()[name = string("attention_7_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_7_update_end_0 = concat(axis = attention_7_update_end_0_axis_0, interleave = attention_7_update_end_0_interleave_0, values = (attention_7_update_end_0_values0_0, attention_7_update_end_0_values1_0, end_pos_0, attention_7_update_end_0_values3_0))[name = string("attention_7_update_end_0")]; + tensor attention_7_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_updated_key_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_key_cache_0_squeeze_mask_0, update = attention_7_k_rope, x = coreml_update_state_12)[name = string("attention_7_updated_key_cache_0")]; + write_state(data = attention_7_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_14_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache_state)[name = string("coreml_update_state_14")]; + tensor attention_7_key_cache_begin_0 = const()[name = string("attention_7_key_cache_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor attention_7_key_cache_end_0 = const()[name = string("attention_7_key_cache_end_0"), val = tensor([8, 2, 512, 64])]; + tensor attention_7_key_cache_squeeze_mask_0 = const()[name = string("attention_7_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_key_cache = slice_by_index(begin = attention_7_key_cache_begin_0, end = attention_7_key_cache_end_0, squeeze_mask = attention_7_key_cache_squeeze_mask_0, x = coreml_update_state_14)[name = string("attention_7_key_cache")]; + int32 attention_7_key_cache_head_axis_0 = const()[name = string("attention_7_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_7_key_cache_head_num_splits_0 = const()[name = string("attention_7_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_7_key_cache_head_0, tensor attention_7_key_cache_head_1 = split(axis = attention_7_key_cache_head_axis_0, num_splits = attention_7_key_cache_head_num_splits_0, x = attention_7_key_cache)[name = string("attention_7_key_cache_head")]; + tensor attention_7_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_updated_value_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_value_cache_0_squeeze_mask_0, update = attention_7_split_qkv_heads_2, x = coreml_update_state_13)[name = string("attention_7_updated_value_cache_0")]; + write_state(data = attention_7_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_15_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache_state)[name = string("coreml_update_state_15")]; + tensor attention_7_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_7_slice_current_layer_value_cache_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor attention_7_slice_current_layer_value_cache_end_0 = const()[name = string("attention_7_slice_current_layer_value_cache_end_0"), val = tensor([8, 2, 512, 64])]; + tensor attention_7_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_7_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_slice_current_layer_value_cache = slice_by_index(begin = attention_7_slice_current_layer_value_cache_begin_0, end = attention_7_slice_current_layer_value_cache_end_0, squeeze_mask = attention_7_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_15)[name = string("attention_7_slice_current_layer_value_cache")]; + int32 attention_7_slice_value_cache_heads_axis_0 = const()[name = string("attention_7_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_7_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_7_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_7_slice_value_cache_heads_0, tensor attention_7_slice_value_cache_heads_1 = split(axis = attention_7_slice_value_cache_heads_axis_0, num_splits = attention_7_slice_value_cache_heads_num_splits_0, x = attention_7_slice_current_layer_value_cache)[name = string("attention_7_slice_value_cache_heads")]; + bool attention_7_scores_0_transpose_y_0 = const()[name = string("attention_7_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_7_scores_0_transpose_x_0 = const()[name = string("attention_7_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_7_scores_0 = matmul(transpose_x = attention_7_scores_0_transpose_x_0, transpose_y = attention_7_scores_0_transpose_y_0, x = attention_7_key_cache_head_0, y = attention_7_q_splits_0)[name = string("attention_7_scores_0")]; + fp16 attention_7_scaled_scores_0_y_0 = const()[name = string("attention_7_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_7_scaled_scores_0 = mul(x = attention_7_scores_0, y = attention_7_scaled_scores_0_y_0)[name = string("attention_7_scaled_scores_0")]; + tensor attention_7_masked_scaled_scores_0 = add(x = attention_7_scaled_scores_0, y = transpose_0)[name = string("attention_7_masked_scaled_scores_0")]; + int32 softmax_14_axis_0 = const()[name = string("softmax_14_axis_0"), val = int32(-2)]; + tensor softmax_14 = softmax(axis = softmax_14_axis_0, x = attention_7_masked_scaled_scores_0)[name = string("softmax_14")]; + bool attention_7_attention_0_transpose_x_0 = const()[name = string("attention_7_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_7_attention_0_transpose_y_0 = const()[name = string("attention_7_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_7_attention_0 = matmul(transpose_x = attention_7_attention_0_transpose_x_0, transpose_y = attention_7_attention_0_transpose_y_0, x = softmax_14, y = attention_7_slice_value_cache_heads_0)[name = string("attention_7_attention_0")]; + bool attention_7_scores_1_transpose_y_0 = const()[name = string("attention_7_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_7_scores_1_transpose_x_0 = const()[name = string("attention_7_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_7_scores_1 = matmul(transpose_x = attention_7_scores_1_transpose_x_0, transpose_y = attention_7_scores_1_transpose_y_0, x = attention_7_key_cache_head_1, y = attention_7_q_splits_1)[name = string("attention_7_scores_1")]; + fp16 attention_7_scaled_scores_1_y_0 = const()[name = string("attention_7_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_7_scaled_scores_1 = mul(x = attention_7_scores_1, y = attention_7_scaled_scores_1_y_0)[name = string("attention_7_scaled_scores_1")]; + tensor attention_7_masked_scaled_scores_1 = add(x = attention_7_scaled_scores_1, y = transpose_0)[name = string("attention_7_masked_scaled_scores_1")]; + int32 softmax_15_axis_0 = const()[name = string("softmax_15_axis_0"), val = int32(-2)]; + tensor softmax_15 = softmax(axis = softmax_15_axis_0, x = attention_7_masked_scaled_scores_1)[name = string("softmax_15")]; + bool attention_7_attention_1_transpose_x_0 = const()[name = string("attention_7_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_7_attention_1_transpose_y_0 = const()[name = string("attention_7_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_7_attention_1 = matmul(transpose_x = attention_7_attention_1_transpose_x_0, transpose_y = attention_7_attention_1_transpose_y_0, x = softmax_15, y = attention_7_slice_value_cache_heads_1)[name = string("attention_7_attention_1")]; + int32 attention_7_concat_attention_all_heads_axis_0 = const()[name = string("attention_7_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_7_concat_attention_all_heads_interleave_0 = const()[name = string("attention_7_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_7_concat_attention_all_heads = concat(axis = attention_7_concat_attention_all_heads_axis_0, interleave = attention_7_concat_attention_all_heads_interleave_0, values = (attention_7_attention_0, attention_7_attention_1))[name = string("attention_7_concat_attention_all_heads")]; + tensor attention_7_channels_first_retransposed_perm_0 = const()[name = string("attention_7_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_7_reshaped_shape_0 = const()[name = string("attention_7_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_7_channels_first_retransposed = transpose(perm = attention_7_channels_first_retransposed_perm_0, x = attention_7_concat_attention_all_heads)[name = string("transpose_33")]; + tensor attention_7_reshaped = reshape(shape = attention_7_reshaped_shape_0, x = attention_7_channels_first_retransposed)[name = string("attention_7_reshaped")]; + tensor attention_7_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373287808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373889984))))[name = string("attention_7_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_31 = constexpr_blockwise_shift_scale(data = attention_7_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373918720))))[name = string("constexpr_blockwise_shift_scale_31")]; + tensor attention_7_outproj_strides_0 = const()[name = string("attention_7_outproj_strides_0"), val = tensor([1])]; + string attention_7_outproj_pad_type_0 = const()[name = string("attention_7_outproj_pad_type_0"), val = string("valid")]; + tensor attention_7_outproj_pad_0 = const()[name = string("attention_7_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_7_outproj_dilations_0 = const()[name = string("attention_7_outproj_dilations_0"), val = tensor([1])]; + int32 attention_7_outproj_groups_0 = const()[name = string("attention_7_outproj_groups_0"), val = int32(1)]; + tensor attention_7_outproj = conv(dilations = attention_7_outproj_dilations_0, groups = attention_7_outproj_groups_0, pad = attention_7_outproj_pad_0, pad_type = attention_7_outproj_pad_type_0, strides = attention_7_outproj_strides_0, weight = constexpr_blockwise_shift_scale_31, x = attention_7_reshaped)[name = string("attention_7_outproj")]; + tensor block_7_residual_1 = add(x = block_6_residual_2, y = attention_7_outproj)[name = string("block_7_residual_1")]; + tensor block_7_ffn_rmsnorm_abs = abs(x = block_7_residual_1)[name = string("block_7_ffn_rmsnorm_abs")]; + tensor block_7_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_7_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_7_ffn_rmsnorm_maxval = reduce_max(axes = block_7_ffn_rmsnorm_maxval_axes_0, keep_dims = block_7_ffn_rmsnorm_maxval_keep_dims_0, x = block_7_ffn_rmsnorm_abs)[name = string("block_7_ffn_rmsnorm_maxval")]; + fp16 block_7_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_7_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_7_ffn_rmsnorm_maxval_clipped = clip(alpha = block_7_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_7_ffn_rmsnorm_maxval_clipped_beta_0, x = block_7_ffn_rmsnorm_maxval)[name = string("block_7_ffn_rmsnorm_maxval_clipped")]; + tensor block_7_ffn_rmsnorm_scaled = real_div(x = block_7_residual_1, y = block_7_ffn_rmsnorm_maxval_clipped)[name = string("block_7_ffn_rmsnorm_scaled")]; + tensor block_7_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_7_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_7_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_7_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_7_ffn_rmsnorm_scaled)[name = string("block_7_ffn_rmsnorm_squared_sum")]; + fp16 block_7_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_7_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_7_ffn_rmsnorm_rsqrt_epsilon_0, x = block_7_ffn_rmsnorm_squared_sum)[name = string("block_7_ffn_rmsnorm_rsqrt")]; + fp16 block_7_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_7_ffn_rmsnorm_dim_scaled = mul(x = block_7_ffn_rmsnorm_scaled, y = block_7_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_7_ffn_rmsnorm_dim_scaled")]; + tensor block_7_ffn_rmsnorm_normalized = mul(x = block_7_ffn_rmsnorm_dim_scaled, y = block_7_ffn_rmsnorm_rsqrt)[name = string("block_7_ffn_rmsnorm_normalized")]; + tensor block_7_ffn_rmsnorm_y_0 = const()[name = string("block_7_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373920576)))]; + tensor block_7_ffn_rmsnorm = mul(x = block_7_ffn_rmsnorm_normalized, y = block_7_ffn_rmsnorm_y_0)[name = string("block_7_ffn_rmsnorm")]; + tensor block_7_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373922432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377191104))))[name = string("block_7_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_32 = constexpr_blockwise_shift_scale(data = block_7_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377346816))))[name = string("constexpr_blockwise_shift_scale_32")]; + tensor block_7_ffn_inproj_strides_0 = const()[name = string("block_7_ffn_inproj_strides_0"), val = tensor([1])]; + string block_7_ffn_inproj_pad_type_0 = const()[name = string("block_7_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_7_ffn_inproj_pad_0 = const()[name = string("block_7_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_7_ffn_inproj_dilations_0 = const()[name = string("block_7_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_7_ffn_inproj_groups_0 = const()[name = string("block_7_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_7_ffn_inproj = conv(dilations = block_7_ffn_inproj_dilations_0, groups = block_7_ffn_inproj_groups_0, pad = block_7_ffn_inproj_pad_0, pad_type = block_7_ffn_inproj_pad_type_0, strides = block_7_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_32, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_inproj")]; + tensor block_7_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377356608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380625280))))[name = string("block_7_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_33 = constexpr_blockwise_shift_scale(data = block_7_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380780992))))[name = string("constexpr_blockwise_shift_scale_33")]; + tensor block_7_ffn_g_strides_0 = const()[name = string("block_7_ffn_g_strides_0"), val = tensor([1])]; + string block_7_ffn_g_pad_type_0 = const()[name = string("block_7_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_7_ffn_g_pad_0 = const()[name = string("block_7_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_7_ffn_g_dilations_0 = const()[name = string("block_7_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_7_ffn_g_groups_0 = const()[name = string("block_7_ffn_g_groups_0"), val = int32(1)]; + tensor block_7_ffn_g = conv(dilations = block_7_ffn_g_dilations_0, groups = block_7_ffn_g_groups_0, pad = block_7_ffn_g_pad_0, pad_type = block_7_ffn_g_pad_type_0, strides = block_7_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_33, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_g")]; + tensor block_7_ffn_g_activation = silu(x = block_7_ffn_g)[name = string("block_7_ffn_g_activation")]; + tensor block_7_ffn_x_gated = mul(x = block_7_ffn_inproj, y = block_7_ffn_g_activation)[name = string("block_7_ffn_x_gated")]; + tensor block_7_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380790784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384059456))))[name = string("block_7_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_34 = constexpr_blockwise_shift_scale(data = block_7_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384088192))))[name = string("constexpr_blockwise_shift_scale_34")]; + tensor block_7_ffn_outproj_strides_0 = const()[name = string("block_7_ffn_outproj_strides_0"), val = tensor([1])]; + string block_7_ffn_outproj_pad_type_0 = const()[name = string("block_7_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_7_ffn_outproj_pad_0 = const()[name = string("block_7_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_7_ffn_outproj_dilations_0 = const()[name = string("block_7_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_7_ffn_outproj_groups_0 = const()[name = string("block_7_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_7_ffn_outproj = conv(dilations = block_7_ffn_outproj_dilations_0, groups = block_7_ffn_outproj_groups_0, pad = block_7_ffn_outproj_pad_0, pad_type = block_7_ffn_outproj_pad_type_0, strides = block_7_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_34, x = block_7_ffn_x_gated)[name = string("block_7_ffn_outproj")]; + tensor block_7_residual_2 = add(x = block_7_ffn_outproj, y = block_7_residual_1)[name = string("block_7_residual_2")]; + tensor block_8_attention_rmsnorm_abs = abs(x = block_7_residual_2)[name = string("block_8_attention_rmsnorm_abs")]; + tensor block_8_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_8_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_8_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_8_attention_rmsnorm_maxval = reduce_max(axes = block_8_attention_rmsnorm_maxval_axes_0, keep_dims = block_8_attention_rmsnorm_maxval_keep_dims_0, x = block_8_attention_rmsnorm_abs)[name = string("block_8_attention_rmsnorm_maxval")]; + fp16 block_8_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_8_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_8_attention_rmsnorm_maxval_clipped = clip(alpha = block_8_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_8_attention_rmsnorm_maxval_clipped_beta_0, x = block_8_attention_rmsnorm_maxval)[name = string("block_8_attention_rmsnorm_maxval_clipped")]; + tensor block_8_attention_rmsnorm_scaled = real_div(x = block_7_residual_2, y = block_8_attention_rmsnorm_maxval_clipped)[name = string("block_8_attention_rmsnorm_scaled")]; + tensor block_8_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_8_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_8_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_8_attention_rmsnorm_squared_sum_keep_dims_0, x = block_8_attention_rmsnorm_scaled)[name = string("block_8_attention_rmsnorm_squared_sum")]; + fp16 block_8_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_8_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_8_attention_rmsnorm_rsqrt_epsilon_0, x = block_8_attention_rmsnorm_squared_sum)[name = string("block_8_attention_rmsnorm_rsqrt")]; + fp16 block_8_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_8_attention_rmsnorm_dim_scaled = mul(x = block_8_attention_rmsnorm_scaled, y = block_8_attention_rmsnorm_dim_scaled_y_0)[name = string("block_8_attention_rmsnorm_dim_scaled")]; + tensor block_8_attention_rmsnorm_normalized = mul(x = block_8_attention_rmsnorm_dim_scaled, y = block_8_attention_rmsnorm_rsqrt)[name = string("block_8_attention_rmsnorm_normalized")]; + tensor block_8_attention_rmsnorm_y_0 = const()[name = string("block_8_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384090048)))]; + tensor block_8_attention_rmsnorm = mul(x = block_8_attention_rmsnorm_normalized, y = block_8_attention_rmsnorm_y_0)[name = string("block_8_attention_rmsnorm")]; + tensor attention_8_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384091904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384866112))))[name = string("attention_8_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_35 = constexpr_blockwise_shift_scale(data = attention_8_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384903040))))[name = string("constexpr_blockwise_shift_scale_35")]; + tensor attention_8_qkvproj_bias_0 = const()[name = string("attention_8_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384905408)))]; + tensor attention_8_qkvproj_strides_0 = const()[name = string("attention_8_qkvproj_strides_0"), val = tensor([1])]; + string attention_8_qkvproj_pad_type_0 = const()[name = string("attention_8_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_8_qkvproj_pad_0 = const()[name = string("attention_8_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_8_qkvproj_dilations_0 = const()[name = string("attention_8_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_8_qkvproj_groups_0 = const()[name = string("attention_8_qkvproj_groups_0"), val = int32(1)]; + tensor attention_8_qkvproj = conv(bias = attention_8_qkvproj_bias_0, dilations = attention_8_qkvproj_dilations_0, groups = attention_8_qkvproj_groups_0, pad = attention_8_qkvproj_pad_0, pad_type = attention_8_qkvproj_pad_type_0, strides = attention_8_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_35, x = block_8_attention_rmsnorm)[name = string("attention_8_qkvproj")]; + tensor attention_8_head_reshape_shape_0 = const()[name = string("attention_8_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_8_head_reshape = reshape(shape = attention_8_head_reshape_shape_0, x = attention_8_qkvproj)[name = string("attention_8_head_reshape")]; + tensor attention_8_head_transpose_perm_0 = const()[name = string("attention_8_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_8_split_qkv_heads_axis_0 = const()[name = string("attention_8_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_8_split_qkv_heads_split_sizes_0 = const()[name = string("attention_8_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_8_head_transpose = transpose(perm = attention_8_head_transpose_perm_0, x = attention_8_head_reshape)[name = string("transpose_32")]; + tensor attention_8_split_qkv_heads_0, tensor attention_8_split_qkv_heads_1, tensor attention_8_split_qkv_heads_2 = split(axis = attention_8_split_qkv_heads_axis_0, split_sizes = attention_8_split_qkv_heads_split_sizes_0, x = attention_8_head_transpose)[name = string("attention_8_split_qkv_heads")]; + tensor attention_8_q_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_8_q_rope_lhs_mult")]; + int32 attention_8_q_rotate_half_split_num_splits_0 = const()[name = string("attention_8_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_8_q_rotate_half_split_axis_0 = const()[name = string("attention_8_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_8_q_rotate_half_split_0, tensor attention_8_q_rotate_half_split_1 = split(axis = attention_8_q_rotate_half_split_axis_0, num_splits = attention_8_q_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_0)[name = string("attention_8_q_rotate_half_split")]; + fp16 attention_8_q_rotate_half_neg_y_0 = const()[name = string("attention_8_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_8_q_rotate_half_neg = mul(x = attention_8_q_rotate_half_split_1, y = attention_8_q_rotate_half_neg_y_0)[name = string("attention_8_q_rotate_half_neg")]; + int32 attention_8_q_rotate_half_concat_axis_0 = const()[name = string("attention_8_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_8_q_rotate_half_concat_interleave_0 = const()[name = string("attention_8_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_8_q_rotate_half_concat = concat(axis = attention_8_q_rotate_half_concat_axis_0, interleave = attention_8_q_rotate_half_concat_interleave_0, values = (attention_8_q_rotate_half_neg, attention_8_q_rotate_half_split_0))[name = string("attention_8_q_rotate_half_concat")]; + tensor attention_8_q_rope_rhs_mult = mul(x = attention_8_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_q_rope_rhs_mult")]; + tensor attention_8_q_rope = add(x = attention_8_q_rope_lhs_mult, y = attention_8_q_rope_rhs_mult)[name = string("attention_8_q_rope")]; + tensor attention_8_k_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_8_k_rope_lhs_mult")]; + int32 attention_8_k_rotate_half_split_num_splits_0 = const()[name = string("attention_8_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_8_k_rotate_half_split_axis_0 = const()[name = string("attention_8_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_8_k_rotate_half_split_0, tensor attention_8_k_rotate_half_split_1 = split(axis = attention_8_k_rotate_half_split_axis_0, num_splits = attention_8_k_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_1)[name = string("attention_8_k_rotate_half_split")]; + fp16 attention_8_k_rotate_half_neg_y_0 = const()[name = string("attention_8_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_8_k_rotate_half_neg = mul(x = attention_8_k_rotate_half_split_1, y = attention_8_k_rotate_half_neg_y_0)[name = string("attention_8_k_rotate_half_neg")]; + int32 attention_8_k_rotate_half_concat_axis_0 = const()[name = string("attention_8_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_8_k_rotate_half_concat_interleave_0 = const()[name = string("attention_8_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_8_k_rotate_half_concat = concat(axis = attention_8_k_rotate_half_concat_axis_0, interleave = attention_8_k_rotate_half_concat_interleave_0, values = (attention_8_k_rotate_half_neg, attention_8_k_rotate_half_split_0))[name = string("attention_8_k_rotate_half_concat")]; + tensor attention_8_k_rope_rhs_mult = mul(x = attention_8_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_k_rope_rhs_mult")]; + tensor attention_8_k_rope = add(x = attention_8_k_rope_lhs_mult, y = attention_8_k_rope_rhs_mult)[name = string("attention_8_k_rope")]; + int32 attention_8_q_splits_axis_0 = const()[name = string("attention_8_q_splits_axis_0"), val = int32(1)]; + int32 attention_8_q_splits_num_splits_0 = const()[name = string("attention_8_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_8_q_splits_0, tensor attention_8_q_splits_1 = split(axis = attention_8_q_splits_axis_0, num_splits = attention_8_q_splits_num_splits_0, x = attention_8_q_rope)[name = string("attention_8_q_splits")]; + tensor attention_8_update_begin_0_values0_0 = const()[name = string("attention_8_update_begin_0_values0_0"), val = tensor([8])]; + tensor attention_8_update_begin_0_values1_0 = const()[name = string("attention_8_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_8_update_begin_0_values3_0 = const()[name = string("attention_8_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_8_update_begin_0_axis_0 = const()[name = string("attention_8_update_begin_0_axis_0"), val = int32(0)]; + bool attention_8_update_begin_0_interleave_0 = const()[name = string("attention_8_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_8_update_begin_0 = concat(axis = attention_8_update_begin_0_axis_0, interleave = attention_8_update_begin_0_interleave_0, values = (attention_8_update_begin_0_values0_0, attention_8_update_begin_0_values1_0, query_pos1, attention_8_update_begin_0_values3_0))[name = string("attention_8_update_begin_0")]; + tensor attention_8_update_end_0_values0_0 = const()[name = string("attention_8_update_end_0_values0_0"), val = tensor([9])]; + tensor attention_8_update_end_0_values1_0 = const()[name = string("attention_8_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_8_update_end_0_values3_0 = const()[name = string("attention_8_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_8_update_end_0_axis_0 = const()[name = string("attention_8_update_end_0_axis_0"), val = int32(0)]; + bool attention_8_update_end_0_interleave_0 = const()[name = string("attention_8_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_8_update_end_0 = concat(axis = attention_8_update_end_0_axis_0, interleave = attention_8_update_end_0_interleave_0, values = (attention_8_update_end_0_values0_0, attention_8_update_end_0_values1_0, end_pos_0, attention_8_update_end_0_values3_0))[name = string("attention_8_update_end_0")]; + tensor attention_8_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_updated_key_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_key_cache_0_squeeze_mask_0, update = attention_8_k_rope, x = coreml_update_state_14)[name = string("attention_8_updated_key_cache_0")]; + write_state(data = attention_8_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_16_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache_state)[name = string("coreml_update_state_16")]; + tensor attention_8_key_cache_begin_0 = const()[name = string("attention_8_key_cache_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor attention_8_key_cache_end_0 = const()[name = string("attention_8_key_cache_end_0"), val = tensor([9, 2, 512, 64])]; + tensor attention_8_key_cache_squeeze_mask_0 = const()[name = string("attention_8_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_key_cache = slice_by_index(begin = attention_8_key_cache_begin_0, end = attention_8_key_cache_end_0, squeeze_mask = attention_8_key_cache_squeeze_mask_0, x = coreml_update_state_16)[name = string("attention_8_key_cache")]; + int32 attention_8_key_cache_head_axis_0 = const()[name = string("attention_8_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_8_key_cache_head_num_splits_0 = const()[name = string("attention_8_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_8_key_cache_head_0, tensor attention_8_key_cache_head_1 = split(axis = attention_8_key_cache_head_axis_0, num_splits = attention_8_key_cache_head_num_splits_0, x = attention_8_key_cache)[name = string("attention_8_key_cache_head")]; + tensor attention_8_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_updated_value_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_value_cache_0_squeeze_mask_0, update = attention_8_split_qkv_heads_2, x = coreml_update_state_15)[name = string("attention_8_updated_value_cache_0")]; + write_state(data = attention_8_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_17_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache_state)[name = string("coreml_update_state_17")]; + tensor attention_8_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_8_slice_current_layer_value_cache_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor attention_8_slice_current_layer_value_cache_end_0 = const()[name = string("attention_8_slice_current_layer_value_cache_end_0"), val = tensor([9, 2, 512, 64])]; + tensor attention_8_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_8_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_slice_current_layer_value_cache = slice_by_index(begin = attention_8_slice_current_layer_value_cache_begin_0, end = attention_8_slice_current_layer_value_cache_end_0, squeeze_mask = attention_8_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_17)[name = string("attention_8_slice_current_layer_value_cache")]; + int32 attention_8_slice_value_cache_heads_axis_0 = const()[name = string("attention_8_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_8_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_8_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_8_slice_value_cache_heads_0, tensor attention_8_slice_value_cache_heads_1 = split(axis = attention_8_slice_value_cache_heads_axis_0, num_splits = attention_8_slice_value_cache_heads_num_splits_0, x = attention_8_slice_current_layer_value_cache)[name = string("attention_8_slice_value_cache_heads")]; + bool attention_8_scores_0_transpose_y_0 = const()[name = string("attention_8_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_8_scores_0_transpose_x_0 = const()[name = string("attention_8_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_8_scores_0 = matmul(transpose_x = attention_8_scores_0_transpose_x_0, transpose_y = attention_8_scores_0_transpose_y_0, x = attention_8_key_cache_head_0, y = attention_8_q_splits_0)[name = string("attention_8_scores_0")]; + fp16 attention_8_scaled_scores_0_y_0 = const()[name = string("attention_8_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_8_scaled_scores_0 = mul(x = attention_8_scores_0, y = attention_8_scaled_scores_0_y_0)[name = string("attention_8_scaled_scores_0")]; + tensor attention_8_masked_scaled_scores_0 = add(x = attention_8_scaled_scores_0, y = transpose_0)[name = string("attention_8_masked_scaled_scores_0")]; + int32 softmax_16_axis_0 = const()[name = string("softmax_16_axis_0"), val = int32(-2)]; + tensor softmax_16 = softmax(axis = softmax_16_axis_0, x = attention_8_masked_scaled_scores_0)[name = string("softmax_16")]; + bool attention_8_attention_0_transpose_x_0 = const()[name = string("attention_8_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_8_attention_0_transpose_y_0 = const()[name = string("attention_8_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_8_attention_0 = matmul(transpose_x = attention_8_attention_0_transpose_x_0, transpose_y = attention_8_attention_0_transpose_y_0, x = softmax_16, y = attention_8_slice_value_cache_heads_0)[name = string("attention_8_attention_0")]; + bool attention_8_scores_1_transpose_y_0 = const()[name = string("attention_8_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_8_scores_1_transpose_x_0 = const()[name = string("attention_8_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_8_scores_1 = matmul(transpose_x = attention_8_scores_1_transpose_x_0, transpose_y = attention_8_scores_1_transpose_y_0, x = attention_8_key_cache_head_1, y = attention_8_q_splits_1)[name = string("attention_8_scores_1")]; + fp16 attention_8_scaled_scores_1_y_0 = const()[name = string("attention_8_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_8_scaled_scores_1 = mul(x = attention_8_scores_1, y = attention_8_scaled_scores_1_y_0)[name = string("attention_8_scaled_scores_1")]; + tensor attention_8_masked_scaled_scores_1 = add(x = attention_8_scaled_scores_1, y = transpose_0)[name = string("attention_8_masked_scaled_scores_1")]; + int32 softmax_17_axis_0 = const()[name = string("softmax_17_axis_0"), val = int32(-2)]; + tensor softmax_17 = softmax(axis = softmax_17_axis_0, x = attention_8_masked_scaled_scores_1)[name = string("softmax_17")]; + bool attention_8_attention_1_transpose_x_0 = const()[name = string("attention_8_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_8_attention_1_transpose_y_0 = const()[name = string("attention_8_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_8_attention_1 = matmul(transpose_x = attention_8_attention_1_transpose_x_0, transpose_y = attention_8_attention_1_transpose_y_0, x = softmax_17, y = attention_8_slice_value_cache_heads_1)[name = string("attention_8_attention_1")]; + int32 attention_8_concat_attention_all_heads_axis_0 = const()[name = string("attention_8_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_8_concat_attention_all_heads_interleave_0 = const()[name = string("attention_8_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_8_concat_attention_all_heads = concat(axis = attention_8_concat_attention_all_heads_axis_0, interleave = attention_8_concat_attention_all_heads_interleave_0, values = (attention_8_attention_0, attention_8_attention_1))[name = string("attention_8_concat_attention_all_heads")]; + tensor attention_8_channels_first_retransposed_perm_0 = const()[name = string("attention_8_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_8_reshaped_shape_0 = const()[name = string("attention_8_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_8_channels_first_retransposed = transpose(perm = attention_8_channels_first_retransposed_perm_0, x = attention_8_concat_attention_all_heads)[name = string("transpose_31")]; + tensor attention_8_reshaped = reshape(shape = attention_8_reshaped_shape_0, x = attention_8_channels_first_retransposed)[name = string("attention_8_reshaped")]; + tensor attention_8_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384907776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385509952))))[name = string("attention_8_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_36 = constexpr_blockwise_shift_scale(data = attention_8_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385538688))))[name = string("constexpr_blockwise_shift_scale_36")]; + tensor attention_8_outproj_strides_0 = const()[name = string("attention_8_outproj_strides_0"), val = tensor([1])]; + string attention_8_outproj_pad_type_0 = const()[name = string("attention_8_outproj_pad_type_0"), val = string("valid")]; + tensor attention_8_outproj_pad_0 = const()[name = string("attention_8_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_8_outproj_dilations_0 = const()[name = string("attention_8_outproj_dilations_0"), val = tensor([1])]; + int32 attention_8_outproj_groups_0 = const()[name = string("attention_8_outproj_groups_0"), val = int32(1)]; + tensor attention_8_outproj = conv(dilations = attention_8_outproj_dilations_0, groups = attention_8_outproj_groups_0, pad = attention_8_outproj_pad_0, pad_type = attention_8_outproj_pad_type_0, strides = attention_8_outproj_strides_0, weight = constexpr_blockwise_shift_scale_36, x = attention_8_reshaped)[name = string("attention_8_outproj")]; + tensor block_8_residual_1 = add(x = block_7_residual_2, y = attention_8_outproj)[name = string("block_8_residual_1")]; + tensor block_8_ffn_rmsnorm_abs = abs(x = block_8_residual_1)[name = string("block_8_ffn_rmsnorm_abs")]; + tensor block_8_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_8_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_8_ffn_rmsnorm_maxval = reduce_max(axes = block_8_ffn_rmsnorm_maxval_axes_0, keep_dims = block_8_ffn_rmsnorm_maxval_keep_dims_0, x = block_8_ffn_rmsnorm_abs)[name = string("block_8_ffn_rmsnorm_maxval")]; + fp16 block_8_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_8_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_8_ffn_rmsnorm_maxval_clipped = clip(alpha = block_8_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_8_ffn_rmsnorm_maxval_clipped_beta_0, x = block_8_ffn_rmsnorm_maxval)[name = string("block_8_ffn_rmsnorm_maxval_clipped")]; + tensor block_8_ffn_rmsnorm_scaled = real_div(x = block_8_residual_1, y = block_8_ffn_rmsnorm_maxval_clipped)[name = string("block_8_ffn_rmsnorm_scaled")]; + tensor block_8_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_8_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_8_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_8_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_8_ffn_rmsnorm_scaled)[name = string("block_8_ffn_rmsnorm_squared_sum")]; + fp16 block_8_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_8_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_8_ffn_rmsnorm_rsqrt_epsilon_0, x = block_8_ffn_rmsnorm_squared_sum)[name = string("block_8_ffn_rmsnorm_rsqrt")]; + fp16 block_8_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_8_ffn_rmsnorm_dim_scaled = mul(x = block_8_ffn_rmsnorm_scaled, y = block_8_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_8_ffn_rmsnorm_dim_scaled")]; + tensor block_8_ffn_rmsnorm_normalized = mul(x = block_8_ffn_rmsnorm_dim_scaled, y = block_8_ffn_rmsnorm_rsqrt)[name = string("block_8_ffn_rmsnorm_normalized")]; + tensor block_8_ffn_rmsnorm_y_0 = const()[name = string("block_8_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385540544)))]; + tensor block_8_ffn_rmsnorm = mul(x = block_8_ffn_rmsnorm_normalized, y = block_8_ffn_rmsnorm_y_0)[name = string("block_8_ffn_rmsnorm")]; + tensor block_8_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385542400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388811072))))[name = string("block_8_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_37 = constexpr_blockwise_shift_scale(data = block_8_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388966784))))[name = string("constexpr_blockwise_shift_scale_37")]; + tensor block_8_ffn_inproj_strides_0 = const()[name = string("block_8_ffn_inproj_strides_0"), val = tensor([1])]; + string block_8_ffn_inproj_pad_type_0 = const()[name = string("block_8_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_8_ffn_inproj_pad_0 = const()[name = string("block_8_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_8_ffn_inproj_dilations_0 = const()[name = string("block_8_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_8_ffn_inproj_groups_0 = const()[name = string("block_8_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_8_ffn_inproj = conv(dilations = block_8_ffn_inproj_dilations_0, groups = block_8_ffn_inproj_groups_0, pad = block_8_ffn_inproj_pad_0, pad_type = block_8_ffn_inproj_pad_type_0, strides = block_8_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_37, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_inproj")]; + tensor block_8_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388976576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392245248))))[name = string("block_8_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_38 = constexpr_blockwise_shift_scale(data = block_8_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392400960))))[name = string("constexpr_blockwise_shift_scale_38")]; + tensor block_8_ffn_g_strides_0 = const()[name = string("block_8_ffn_g_strides_0"), val = tensor([1])]; + string block_8_ffn_g_pad_type_0 = const()[name = string("block_8_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_8_ffn_g_pad_0 = const()[name = string("block_8_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_8_ffn_g_dilations_0 = const()[name = string("block_8_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_8_ffn_g_groups_0 = const()[name = string("block_8_ffn_g_groups_0"), val = int32(1)]; + tensor block_8_ffn_g = conv(dilations = block_8_ffn_g_dilations_0, groups = block_8_ffn_g_groups_0, pad = block_8_ffn_g_pad_0, pad_type = block_8_ffn_g_pad_type_0, strides = block_8_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_38, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_g")]; + tensor block_8_ffn_g_activation = silu(x = block_8_ffn_g)[name = string("block_8_ffn_g_activation")]; + tensor block_8_ffn_x_gated = mul(x = block_8_ffn_inproj, y = block_8_ffn_g_activation)[name = string("block_8_ffn_x_gated")]; + tensor block_8_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392410752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395679424))))[name = string("block_8_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_39 = constexpr_blockwise_shift_scale(data = block_8_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395708160))))[name = string("constexpr_blockwise_shift_scale_39")]; + tensor block_8_ffn_outproj_strides_0 = const()[name = string("block_8_ffn_outproj_strides_0"), val = tensor([1])]; + string block_8_ffn_outproj_pad_type_0 = const()[name = string("block_8_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_8_ffn_outproj_pad_0 = const()[name = string("block_8_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_8_ffn_outproj_dilations_0 = const()[name = string("block_8_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_8_ffn_outproj_groups_0 = const()[name = string("block_8_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_8_ffn_outproj = conv(dilations = block_8_ffn_outproj_dilations_0, groups = block_8_ffn_outproj_groups_0, pad = block_8_ffn_outproj_pad_0, pad_type = block_8_ffn_outproj_pad_type_0, strides = block_8_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_39, x = block_8_ffn_x_gated)[name = string("block_8_ffn_outproj")]; + tensor block_8_residual_2 = add(x = block_8_ffn_outproj, y = block_8_residual_1)[name = string("block_8_residual_2")]; + tensor block_9_attention_rmsnorm_abs = abs(x = block_8_residual_2)[name = string("block_9_attention_rmsnorm_abs")]; + tensor block_9_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_9_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_9_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_9_attention_rmsnorm_maxval = reduce_max(axes = block_9_attention_rmsnorm_maxval_axes_0, keep_dims = block_9_attention_rmsnorm_maxval_keep_dims_0, x = block_9_attention_rmsnorm_abs)[name = string("block_9_attention_rmsnorm_maxval")]; + fp16 block_9_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_9_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_9_attention_rmsnorm_maxval_clipped = clip(alpha = block_9_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_9_attention_rmsnorm_maxval_clipped_beta_0, x = block_9_attention_rmsnorm_maxval)[name = string("block_9_attention_rmsnorm_maxval_clipped")]; + tensor block_9_attention_rmsnorm_scaled = real_div(x = block_8_residual_2, y = block_9_attention_rmsnorm_maxval_clipped)[name = string("block_9_attention_rmsnorm_scaled")]; + tensor block_9_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_9_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_9_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_9_attention_rmsnorm_squared_sum_keep_dims_0, x = block_9_attention_rmsnorm_scaled)[name = string("block_9_attention_rmsnorm_squared_sum")]; + fp16 block_9_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_9_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_9_attention_rmsnorm_rsqrt_epsilon_0, x = block_9_attention_rmsnorm_squared_sum)[name = string("block_9_attention_rmsnorm_rsqrt")]; + fp16 block_9_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_9_attention_rmsnorm_dim_scaled = mul(x = block_9_attention_rmsnorm_scaled, y = block_9_attention_rmsnorm_dim_scaled_y_0)[name = string("block_9_attention_rmsnorm_dim_scaled")]; + tensor block_9_attention_rmsnorm_normalized = mul(x = block_9_attention_rmsnorm_dim_scaled, y = block_9_attention_rmsnorm_rsqrt)[name = string("block_9_attention_rmsnorm_normalized")]; + tensor block_9_attention_rmsnorm_y_0 = const()[name = string("block_9_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395710016)))]; + tensor block_9_attention_rmsnorm = mul(x = block_9_attention_rmsnorm_normalized, y = block_9_attention_rmsnorm_y_0)[name = string("block_9_attention_rmsnorm")]; + tensor attention_9_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395711872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396486080))))[name = string("attention_9_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_40 = constexpr_blockwise_shift_scale(data = attention_9_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396523008))))[name = string("constexpr_blockwise_shift_scale_40")]; + tensor attention_9_qkvproj_bias_0 = const()[name = string("attention_9_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396525376)))]; + tensor attention_9_qkvproj_strides_0 = const()[name = string("attention_9_qkvproj_strides_0"), val = tensor([1])]; + string attention_9_qkvproj_pad_type_0 = const()[name = string("attention_9_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_9_qkvproj_pad_0 = const()[name = string("attention_9_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_9_qkvproj_dilations_0 = const()[name = string("attention_9_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_9_qkvproj_groups_0 = const()[name = string("attention_9_qkvproj_groups_0"), val = int32(1)]; + tensor attention_9_qkvproj = conv(bias = attention_9_qkvproj_bias_0, dilations = attention_9_qkvproj_dilations_0, groups = attention_9_qkvproj_groups_0, pad = attention_9_qkvproj_pad_0, pad_type = attention_9_qkvproj_pad_type_0, strides = attention_9_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_40, x = block_9_attention_rmsnorm)[name = string("attention_9_qkvproj")]; + tensor attention_9_head_reshape_shape_0 = const()[name = string("attention_9_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_9_head_reshape = reshape(shape = attention_9_head_reshape_shape_0, x = attention_9_qkvproj)[name = string("attention_9_head_reshape")]; + tensor attention_9_head_transpose_perm_0 = const()[name = string("attention_9_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_9_split_qkv_heads_axis_0 = const()[name = string("attention_9_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_9_split_qkv_heads_split_sizes_0 = const()[name = string("attention_9_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_9_head_transpose = transpose(perm = attention_9_head_transpose_perm_0, x = attention_9_head_reshape)[name = string("transpose_30")]; + tensor attention_9_split_qkv_heads_0, tensor attention_9_split_qkv_heads_1, tensor attention_9_split_qkv_heads_2 = split(axis = attention_9_split_qkv_heads_axis_0, split_sizes = attention_9_split_qkv_heads_split_sizes_0, x = attention_9_head_transpose)[name = string("attention_9_split_qkv_heads")]; + tensor attention_9_q_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_9_q_rope_lhs_mult")]; + int32 attention_9_q_rotate_half_split_num_splits_0 = const()[name = string("attention_9_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_9_q_rotate_half_split_axis_0 = const()[name = string("attention_9_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_9_q_rotate_half_split_0, tensor attention_9_q_rotate_half_split_1 = split(axis = attention_9_q_rotate_half_split_axis_0, num_splits = attention_9_q_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_0)[name = string("attention_9_q_rotate_half_split")]; + fp16 attention_9_q_rotate_half_neg_y_0 = const()[name = string("attention_9_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_9_q_rotate_half_neg = mul(x = attention_9_q_rotate_half_split_1, y = attention_9_q_rotate_half_neg_y_0)[name = string("attention_9_q_rotate_half_neg")]; + int32 attention_9_q_rotate_half_concat_axis_0 = const()[name = string("attention_9_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_9_q_rotate_half_concat_interleave_0 = const()[name = string("attention_9_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_9_q_rotate_half_concat = concat(axis = attention_9_q_rotate_half_concat_axis_0, interleave = attention_9_q_rotate_half_concat_interleave_0, values = (attention_9_q_rotate_half_neg, attention_9_q_rotate_half_split_0))[name = string("attention_9_q_rotate_half_concat")]; + tensor attention_9_q_rope_rhs_mult = mul(x = attention_9_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_q_rope_rhs_mult")]; + tensor attention_9_q_rope = add(x = attention_9_q_rope_lhs_mult, y = attention_9_q_rope_rhs_mult)[name = string("attention_9_q_rope")]; + tensor attention_9_k_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_9_k_rope_lhs_mult")]; + int32 attention_9_k_rotate_half_split_num_splits_0 = const()[name = string("attention_9_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_9_k_rotate_half_split_axis_0 = const()[name = string("attention_9_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_9_k_rotate_half_split_0, tensor attention_9_k_rotate_half_split_1 = split(axis = attention_9_k_rotate_half_split_axis_0, num_splits = attention_9_k_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_1)[name = string("attention_9_k_rotate_half_split")]; + fp16 attention_9_k_rotate_half_neg_y_0 = const()[name = string("attention_9_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_9_k_rotate_half_neg = mul(x = attention_9_k_rotate_half_split_1, y = attention_9_k_rotate_half_neg_y_0)[name = string("attention_9_k_rotate_half_neg")]; + int32 attention_9_k_rotate_half_concat_axis_0 = const()[name = string("attention_9_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_9_k_rotate_half_concat_interleave_0 = const()[name = string("attention_9_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_9_k_rotate_half_concat = concat(axis = attention_9_k_rotate_half_concat_axis_0, interleave = attention_9_k_rotate_half_concat_interleave_0, values = (attention_9_k_rotate_half_neg, attention_9_k_rotate_half_split_0))[name = string("attention_9_k_rotate_half_concat")]; + tensor attention_9_k_rope_rhs_mult = mul(x = attention_9_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_k_rope_rhs_mult")]; + tensor attention_9_k_rope = add(x = attention_9_k_rope_lhs_mult, y = attention_9_k_rope_rhs_mult)[name = string("attention_9_k_rope")]; + int32 attention_9_q_splits_axis_0 = const()[name = string("attention_9_q_splits_axis_0"), val = int32(1)]; + int32 attention_9_q_splits_num_splits_0 = const()[name = string("attention_9_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_9_q_splits_0, tensor attention_9_q_splits_1 = split(axis = attention_9_q_splits_axis_0, num_splits = attention_9_q_splits_num_splits_0, x = attention_9_q_rope)[name = string("attention_9_q_splits")]; + tensor attention_9_update_begin_0_values0_0 = const()[name = string("attention_9_update_begin_0_values0_0"), val = tensor([9])]; + tensor attention_9_update_begin_0_values1_0 = const()[name = string("attention_9_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_9_update_begin_0_values3_0 = const()[name = string("attention_9_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_9_update_begin_0_axis_0 = const()[name = string("attention_9_update_begin_0_axis_0"), val = int32(0)]; + bool attention_9_update_begin_0_interleave_0 = const()[name = string("attention_9_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_9_update_begin_0 = concat(axis = attention_9_update_begin_0_axis_0, interleave = attention_9_update_begin_0_interleave_0, values = (attention_9_update_begin_0_values0_0, attention_9_update_begin_0_values1_0, query_pos1, attention_9_update_begin_0_values3_0))[name = string("attention_9_update_begin_0")]; + tensor attention_9_update_end_0_values0_0 = const()[name = string("attention_9_update_end_0_values0_0"), val = tensor([10])]; + tensor attention_9_update_end_0_values1_0 = const()[name = string("attention_9_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_9_update_end_0_values3_0 = const()[name = string("attention_9_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_9_update_end_0_axis_0 = const()[name = string("attention_9_update_end_0_axis_0"), val = int32(0)]; + bool attention_9_update_end_0_interleave_0 = const()[name = string("attention_9_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_9_update_end_0 = concat(axis = attention_9_update_end_0_axis_0, interleave = attention_9_update_end_0_interleave_0, values = (attention_9_update_end_0_values0_0, attention_9_update_end_0_values1_0, end_pos_0, attention_9_update_end_0_values3_0))[name = string("attention_9_update_end_0")]; + tensor attention_9_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_updated_key_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_key_cache_0_squeeze_mask_0, update = attention_9_k_rope, x = coreml_update_state_16)[name = string("attention_9_updated_key_cache_0")]; + write_state(data = attention_9_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_18_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache_state)[name = string("coreml_update_state_18")]; + tensor attention_9_key_cache_begin_0 = const()[name = string("attention_9_key_cache_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor attention_9_key_cache_end_0 = const()[name = string("attention_9_key_cache_end_0"), val = tensor([10, 2, 512, 64])]; + tensor attention_9_key_cache_squeeze_mask_0 = const()[name = string("attention_9_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_key_cache = slice_by_index(begin = attention_9_key_cache_begin_0, end = attention_9_key_cache_end_0, squeeze_mask = attention_9_key_cache_squeeze_mask_0, x = coreml_update_state_18)[name = string("attention_9_key_cache")]; + int32 attention_9_key_cache_head_axis_0 = const()[name = string("attention_9_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_9_key_cache_head_num_splits_0 = const()[name = string("attention_9_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_9_key_cache_head_0, tensor attention_9_key_cache_head_1 = split(axis = attention_9_key_cache_head_axis_0, num_splits = attention_9_key_cache_head_num_splits_0, x = attention_9_key_cache)[name = string("attention_9_key_cache_head")]; + tensor attention_9_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_updated_value_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_value_cache_0_squeeze_mask_0, update = attention_9_split_qkv_heads_2, x = coreml_update_state_17)[name = string("attention_9_updated_value_cache_0")]; + write_state(data = attention_9_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_19_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache_state)[name = string("coreml_update_state_19")]; + tensor attention_9_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_9_slice_current_layer_value_cache_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor attention_9_slice_current_layer_value_cache_end_0 = const()[name = string("attention_9_slice_current_layer_value_cache_end_0"), val = tensor([10, 2, 512, 64])]; + tensor attention_9_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_9_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_slice_current_layer_value_cache = slice_by_index(begin = attention_9_slice_current_layer_value_cache_begin_0, end = attention_9_slice_current_layer_value_cache_end_0, squeeze_mask = attention_9_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_19)[name = string("attention_9_slice_current_layer_value_cache")]; + int32 attention_9_slice_value_cache_heads_axis_0 = const()[name = string("attention_9_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_9_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_9_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_9_slice_value_cache_heads_0, tensor attention_9_slice_value_cache_heads_1 = split(axis = attention_9_slice_value_cache_heads_axis_0, num_splits = attention_9_slice_value_cache_heads_num_splits_0, x = attention_9_slice_current_layer_value_cache)[name = string("attention_9_slice_value_cache_heads")]; + bool attention_9_scores_0_transpose_y_0 = const()[name = string("attention_9_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_9_scores_0_transpose_x_0 = const()[name = string("attention_9_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_9_scores_0 = matmul(transpose_x = attention_9_scores_0_transpose_x_0, transpose_y = attention_9_scores_0_transpose_y_0, x = attention_9_key_cache_head_0, y = attention_9_q_splits_0)[name = string("attention_9_scores_0")]; + fp16 attention_9_scaled_scores_0_y_0 = const()[name = string("attention_9_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_9_scaled_scores_0 = mul(x = attention_9_scores_0, y = attention_9_scaled_scores_0_y_0)[name = string("attention_9_scaled_scores_0")]; + tensor attention_9_masked_scaled_scores_0 = add(x = attention_9_scaled_scores_0, y = transpose_0)[name = string("attention_9_masked_scaled_scores_0")]; + int32 softmax_18_axis_0 = const()[name = string("softmax_18_axis_0"), val = int32(-2)]; + tensor softmax_18 = softmax(axis = softmax_18_axis_0, x = attention_9_masked_scaled_scores_0)[name = string("softmax_18")]; + bool attention_9_attention_0_transpose_x_0 = const()[name = string("attention_9_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_9_attention_0_transpose_y_0 = const()[name = string("attention_9_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_9_attention_0 = matmul(transpose_x = attention_9_attention_0_transpose_x_0, transpose_y = attention_9_attention_0_transpose_y_0, x = softmax_18, y = attention_9_slice_value_cache_heads_0)[name = string("attention_9_attention_0")]; + bool attention_9_scores_1_transpose_y_0 = const()[name = string("attention_9_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_9_scores_1_transpose_x_0 = const()[name = string("attention_9_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_9_scores_1 = matmul(transpose_x = attention_9_scores_1_transpose_x_0, transpose_y = attention_9_scores_1_transpose_y_0, x = attention_9_key_cache_head_1, y = attention_9_q_splits_1)[name = string("attention_9_scores_1")]; + fp16 attention_9_scaled_scores_1_y_0 = const()[name = string("attention_9_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_9_scaled_scores_1 = mul(x = attention_9_scores_1, y = attention_9_scaled_scores_1_y_0)[name = string("attention_9_scaled_scores_1")]; + tensor attention_9_masked_scaled_scores_1 = add(x = attention_9_scaled_scores_1, y = transpose_0)[name = string("attention_9_masked_scaled_scores_1")]; + int32 softmax_19_axis_0 = const()[name = string("softmax_19_axis_0"), val = int32(-2)]; + tensor softmax_19 = softmax(axis = softmax_19_axis_0, x = attention_9_masked_scaled_scores_1)[name = string("softmax_19")]; + bool attention_9_attention_1_transpose_x_0 = const()[name = string("attention_9_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_9_attention_1_transpose_y_0 = const()[name = string("attention_9_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_9_attention_1 = matmul(transpose_x = attention_9_attention_1_transpose_x_0, transpose_y = attention_9_attention_1_transpose_y_0, x = softmax_19, y = attention_9_slice_value_cache_heads_1)[name = string("attention_9_attention_1")]; + int32 attention_9_concat_attention_all_heads_axis_0 = const()[name = string("attention_9_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_9_concat_attention_all_heads_interleave_0 = const()[name = string("attention_9_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_9_concat_attention_all_heads = concat(axis = attention_9_concat_attention_all_heads_axis_0, interleave = attention_9_concat_attention_all_heads_interleave_0, values = (attention_9_attention_0, attention_9_attention_1))[name = string("attention_9_concat_attention_all_heads")]; + tensor attention_9_channels_first_retransposed_perm_0 = const()[name = string("attention_9_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_9_reshaped_shape_0 = const()[name = string("attention_9_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_9_channels_first_retransposed = transpose(perm = attention_9_channels_first_retransposed_perm_0, x = attention_9_concat_attention_all_heads)[name = string("transpose_29")]; + tensor attention_9_reshaped = reshape(shape = attention_9_reshaped_shape_0, x = attention_9_channels_first_retransposed)[name = string("attention_9_reshaped")]; + tensor attention_9_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396527744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397129920))))[name = string("attention_9_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_41 = constexpr_blockwise_shift_scale(data = attention_9_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397158656))))[name = string("constexpr_blockwise_shift_scale_41")]; + tensor attention_9_outproj_strides_0 = const()[name = string("attention_9_outproj_strides_0"), val = tensor([1])]; + string attention_9_outproj_pad_type_0 = const()[name = string("attention_9_outproj_pad_type_0"), val = string("valid")]; + tensor attention_9_outproj_pad_0 = const()[name = string("attention_9_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_9_outproj_dilations_0 = const()[name = string("attention_9_outproj_dilations_0"), val = tensor([1])]; + int32 attention_9_outproj_groups_0 = const()[name = string("attention_9_outproj_groups_0"), val = int32(1)]; + tensor attention_9_outproj = conv(dilations = attention_9_outproj_dilations_0, groups = attention_9_outproj_groups_0, pad = attention_9_outproj_pad_0, pad_type = attention_9_outproj_pad_type_0, strides = attention_9_outproj_strides_0, weight = constexpr_blockwise_shift_scale_41, x = attention_9_reshaped)[name = string("attention_9_outproj")]; + tensor block_9_residual_1 = add(x = block_8_residual_2, y = attention_9_outproj)[name = string("block_9_residual_1")]; + tensor block_9_ffn_rmsnorm_abs = abs(x = block_9_residual_1)[name = string("block_9_ffn_rmsnorm_abs")]; + tensor block_9_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_9_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_9_ffn_rmsnorm_maxval = reduce_max(axes = block_9_ffn_rmsnorm_maxval_axes_0, keep_dims = block_9_ffn_rmsnorm_maxval_keep_dims_0, x = block_9_ffn_rmsnorm_abs)[name = string("block_9_ffn_rmsnorm_maxval")]; + fp16 block_9_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_9_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_9_ffn_rmsnorm_maxval_clipped = clip(alpha = block_9_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_9_ffn_rmsnorm_maxval_clipped_beta_0, x = block_9_ffn_rmsnorm_maxval)[name = string("block_9_ffn_rmsnorm_maxval_clipped")]; + tensor block_9_ffn_rmsnorm_scaled = real_div(x = block_9_residual_1, y = block_9_ffn_rmsnorm_maxval_clipped)[name = string("block_9_ffn_rmsnorm_scaled")]; + tensor block_9_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_9_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_9_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_9_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_9_ffn_rmsnorm_scaled)[name = string("block_9_ffn_rmsnorm_squared_sum")]; + fp16 block_9_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_9_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_9_ffn_rmsnorm_rsqrt_epsilon_0, x = block_9_ffn_rmsnorm_squared_sum)[name = string("block_9_ffn_rmsnorm_rsqrt")]; + fp16 block_9_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_9_ffn_rmsnorm_dim_scaled = mul(x = block_9_ffn_rmsnorm_scaled, y = block_9_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_9_ffn_rmsnorm_dim_scaled")]; + tensor block_9_ffn_rmsnorm_normalized = mul(x = block_9_ffn_rmsnorm_dim_scaled, y = block_9_ffn_rmsnorm_rsqrt)[name = string("block_9_ffn_rmsnorm_normalized")]; + tensor block_9_ffn_rmsnorm_y_0 = const()[name = string("block_9_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397160512)))]; + tensor block_9_ffn_rmsnorm = mul(x = block_9_ffn_rmsnorm_normalized, y = block_9_ffn_rmsnorm_y_0)[name = string("block_9_ffn_rmsnorm")]; + tensor block_9_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397162368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400431040))))[name = string("block_9_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_42 = constexpr_blockwise_shift_scale(data = block_9_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400586752))))[name = string("constexpr_blockwise_shift_scale_42")]; + tensor block_9_ffn_inproj_strides_0 = const()[name = string("block_9_ffn_inproj_strides_0"), val = tensor([1])]; + string block_9_ffn_inproj_pad_type_0 = const()[name = string("block_9_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_9_ffn_inproj_pad_0 = const()[name = string("block_9_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_9_ffn_inproj_dilations_0 = const()[name = string("block_9_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_9_ffn_inproj_groups_0 = const()[name = string("block_9_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_9_ffn_inproj = conv(dilations = block_9_ffn_inproj_dilations_0, groups = block_9_ffn_inproj_groups_0, pad = block_9_ffn_inproj_pad_0, pad_type = block_9_ffn_inproj_pad_type_0, strides = block_9_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_42, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_inproj")]; + tensor block_9_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400596544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403865216))))[name = string("block_9_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_43 = constexpr_blockwise_shift_scale(data = block_9_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404020928))))[name = string("constexpr_blockwise_shift_scale_43")]; + tensor block_9_ffn_g_strides_0 = const()[name = string("block_9_ffn_g_strides_0"), val = tensor([1])]; + string block_9_ffn_g_pad_type_0 = const()[name = string("block_9_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_9_ffn_g_pad_0 = const()[name = string("block_9_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_9_ffn_g_dilations_0 = const()[name = string("block_9_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_9_ffn_g_groups_0 = const()[name = string("block_9_ffn_g_groups_0"), val = int32(1)]; + tensor block_9_ffn_g = conv(dilations = block_9_ffn_g_dilations_0, groups = block_9_ffn_g_groups_0, pad = block_9_ffn_g_pad_0, pad_type = block_9_ffn_g_pad_type_0, strides = block_9_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_43, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_g")]; + tensor block_9_ffn_g_activation = silu(x = block_9_ffn_g)[name = string("block_9_ffn_g_activation")]; + tensor block_9_ffn_x_gated = mul(x = block_9_ffn_inproj, y = block_9_ffn_g_activation)[name = string("block_9_ffn_x_gated")]; + tensor block_9_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404030720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407299392))))[name = string("block_9_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_44 = constexpr_blockwise_shift_scale(data = block_9_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407328128))))[name = string("constexpr_blockwise_shift_scale_44")]; + tensor block_9_ffn_outproj_strides_0 = const()[name = string("block_9_ffn_outproj_strides_0"), val = tensor([1])]; + string block_9_ffn_outproj_pad_type_0 = const()[name = string("block_9_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_9_ffn_outproj_pad_0 = const()[name = string("block_9_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_9_ffn_outproj_dilations_0 = const()[name = string("block_9_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_9_ffn_outproj_groups_0 = const()[name = string("block_9_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_9_ffn_outproj = conv(dilations = block_9_ffn_outproj_dilations_0, groups = block_9_ffn_outproj_groups_0, pad = block_9_ffn_outproj_pad_0, pad_type = block_9_ffn_outproj_pad_type_0, strides = block_9_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_44, x = block_9_ffn_x_gated)[name = string("block_9_ffn_outproj")]; + tensor block_9_residual_2 = add(x = block_9_ffn_outproj, y = block_9_residual_1)[name = string("block_9_residual_2")]; + tensor block_10_attention_rmsnorm_abs = abs(x = block_9_residual_2)[name = string("block_10_attention_rmsnorm_abs")]; + tensor block_10_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_10_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_10_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_10_attention_rmsnorm_maxval = reduce_max(axes = block_10_attention_rmsnorm_maxval_axes_0, keep_dims = block_10_attention_rmsnorm_maxval_keep_dims_0, x = block_10_attention_rmsnorm_abs)[name = string("block_10_attention_rmsnorm_maxval")]; + fp16 block_10_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_10_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_10_attention_rmsnorm_maxval_clipped = clip(alpha = block_10_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_10_attention_rmsnorm_maxval_clipped_beta_0, x = block_10_attention_rmsnorm_maxval)[name = string("block_10_attention_rmsnorm_maxval_clipped")]; + tensor block_10_attention_rmsnorm_scaled = real_div(x = block_9_residual_2, y = block_10_attention_rmsnorm_maxval_clipped)[name = string("block_10_attention_rmsnorm_scaled")]; + tensor block_10_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_10_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_10_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_10_attention_rmsnorm_squared_sum_keep_dims_0, x = block_10_attention_rmsnorm_scaled)[name = string("block_10_attention_rmsnorm_squared_sum")]; + fp16 block_10_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_10_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_10_attention_rmsnorm_rsqrt_epsilon_0, x = block_10_attention_rmsnorm_squared_sum)[name = string("block_10_attention_rmsnorm_rsqrt")]; + fp16 block_10_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_10_attention_rmsnorm_dim_scaled = mul(x = block_10_attention_rmsnorm_scaled, y = block_10_attention_rmsnorm_dim_scaled_y_0)[name = string("block_10_attention_rmsnorm_dim_scaled")]; + tensor block_10_attention_rmsnorm_normalized = mul(x = block_10_attention_rmsnorm_dim_scaled, y = block_10_attention_rmsnorm_rsqrt)[name = string("block_10_attention_rmsnorm_normalized")]; + tensor block_10_attention_rmsnorm_y_0 = const()[name = string("block_10_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407329984)))]; + tensor block_10_attention_rmsnorm = mul(x = block_10_attention_rmsnorm_normalized, y = block_10_attention_rmsnorm_y_0)[name = string("block_10_attention_rmsnorm")]; + tensor attention_10_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407331840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408106048))))[name = string("attention_10_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_45 = constexpr_blockwise_shift_scale(data = attention_10_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408142976))))[name = string("constexpr_blockwise_shift_scale_45")]; + tensor attention_10_qkvproj_bias_0 = const()[name = string("attention_10_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408145344)))]; + tensor attention_10_qkvproj_strides_0 = const()[name = string("attention_10_qkvproj_strides_0"), val = tensor([1])]; + string attention_10_qkvproj_pad_type_0 = const()[name = string("attention_10_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_10_qkvproj_pad_0 = const()[name = string("attention_10_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_10_qkvproj_dilations_0 = const()[name = string("attention_10_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_10_qkvproj_groups_0 = const()[name = string("attention_10_qkvproj_groups_0"), val = int32(1)]; + tensor attention_10_qkvproj = conv(bias = attention_10_qkvproj_bias_0, dilations = attention_10_qkvproj_dilations_0, groups = attention_10_qkvproj_groups_0, pad = attention_10_qkvproj_pad_0, pad_type = attention_10_qkvproj_pad_type_0, strides = attention_10_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_45, x = block_10_attention_rmsnorm)[name = string("attention_10_qkvproj")]; + tensor attention_10_head_reshape_shape_0 = const()[name = string("attention_10_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_10_head_reshape = reshape(shape = attention_10_head_reshape_shape_0, x = attention_10_qkvproj)[name = string("attention_10_head_reshape")]; + tensor attention_10_head_transpose_perm_0 = const()[name = string("attention_10_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_10_split_qkv_heads_axis_0 = const()[name = string("attention_10_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_10_split_qkv_heads_split_sizes_0 = const()[name = string("attention_10_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_10_head_transpose = transpose(perm = attention_10_head_transpose_perm_0, x = attention_10_head_reshape)[name = string("transpose_28")]; + tensor attention_10_split_qkv_heads_0, tensor attention_10_split_qkv_heads_1, tensor attention_10_split_qkv_heads_2 = split(axis = attention_10_split_qkv_heads_axis_0, split_sizes = attention_10_split_qkv_heads_split_sizes_0, x = attention_10_head_transpose)[name = string("attention_10_split_qkv_heads")]; + tensor attention_10_q_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_10_q_rope_lhs_mult")]; + int32 attention_10_q_rotate_half_split_num_splits_0 = const()[name = string("attention_10_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_10_q_rotate_half_split_axis_0 = const()[name = string("attention_10_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_10_q_rotate_half_split_0, tensor attention_10_q_rotate_half_split_1 = split(axis = attention_10_q_rotate_half_split_axis_0, num_splits = attention_10_q_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_0)[name = string("attention_10_q_rotate_half_split")]; + fp16 attention_10_q_rotate_half_neg_y_0 = const()[name = string("attention_10_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_10_q_rotate_half_neg = mul(x = attention_10_q_rotate_half_split_1, y = attention_10_q_rotate_half_neg_y_0)[name = string("attention_10_q_rotate_half_neg")]; + int32 attention_10_q_rotate_half_concat_axis_0 = const()[name = string("attention_10_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_10_q_rotate_half_concat_interleave_0 = const()[name = string("attention_10_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_10_q_rotate_half_concat = concat(axis = attention_10_q_rotate_half_concat_axis_0, interleave = attention_10_q_rotate_half_concat_interleave_0, values = (attention_10_q_rotate_half_neg, attention_10_q_rotate_half_split_0))[name = string("attention_10_q_rotate_half_concat")]; + tensor attention_10_q_rope_rhs_mult = mul(x = attention_10_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_q_rope_rhs_mult")]; + tensor attention_10_q_rope = add(x = attention_10_q_rope_lhs_mult, y = attention_10_q_rope_rhs_mult)[name = string("attention_10_q_rope")]; + tensor attention_10_k_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_10_k_rope_lhs_mult")]; + int32 attention_10_k_rotate_half_split_num_splits_0 = const()[name = string("attention_10_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_10_k_rotate_half_split_axis_0 = const()[name = string("attention_10_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_10_k_rotate_half_split_0, tensor attention_10_k_rotate_half_split_1 = split(axis = attention_10_k_rotate_half_split_axis_0, num_splits = attention_10_k_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_1)[name = string("attention_10_k_rotate_half_split")]; + fp16 attention_10_k_rotate_half_neg_y_0 = const()[name = string("attention_10_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_10_k_rotate_half_neg = mul(x = attention_10_k_rotate_half_split_1, y = attention_10_k_rotate_half_neg_y_0)[name = string("attention_10_k_rotate_half_neg")]; + int32 attention_10_k_rotate_half_concat_axis_0 = const()[name = string("attention_10_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_10_k_rotate_half_concat_interleave_0 = const()[name = string("attention_10_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_10_k_rotate_half_concat = concat(axis = attention_10_k_rotate_half_concat_axis_0, interleave = attention_10_k_rotate_half_concat_interleave_0, values = (attention_10_k_rotate_half_neg, attention_10_k_rotate_half_split_0))[name = string("attention_10_k_rotate_half_concat")]; + tensor attention_10_k_rope_rhs_mult = mul(x = attention_10_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_k_rope_rhs_mult")]; + tensor attention_10_k_rope = add(x = attention_10_k_rope_lhs_mult, y = attention_10_k_rope_rhs_mult)[name = string("attention_10_k_rope")]; + int32 attention_10_q_splits_axis_0 = const()[name = string("attention_10_q_splits_axis_0"), val = int32(1)]; + int32 attention_10_q_splits_num_splits_0 = const()[name = string("attention_10_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_10_q_splits_0, tensor attention_10_q_splits_1 = split(axis = attention_10_q_splits_axis_0, num_splits = attention_10_q_splits_num_splits_0, x = attention_10_q_rope)[name = string("attention_10_q_splits")]; + tensor attention_10_update_begin_0_values0_0 = const()[name = string("attention_10_update_begin_0_values0_0"), val = tensor([10])]; + tensor attention_10_update_begin_0_values1_0 = const()[name = string("attention_10_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_10_update_begin_0_values3_0 = const()[name = string("attention_10_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_10_update_begin_0_axis_0 = const()[name = string("attention_10_update_begin_0_axis_0"), val = int32(0)]; + bool attention_10_update_begin_0_interleave_0 = const()[name = string("attention_10_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_10_update_begin_0 = concat(axis = attention_10_update_begin_0_axis_0, interleave = attention_10_update_begin_0_interleave_0, values = (attention_10_update_begin_0_values0_0, attention_10_update_begin_0_values1_0, query_pos1, attention_10_update_begin_0_values3_0))[name = string("attention_10_update_begin_0")]; + tensor attention_10_update_end_0_values0_0 = const()[name = string("attention_10_update_end_0_values0_0"), val = tensor([11])]; + tensor attention_10_update_end_0_values1_0 = const()[name = string("attention_10_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_10_update_end_0_values3_0 = const()[name = string("attention_10_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_10_update_end_0_axis_0 = const()[name = string("attention_10_update_end_0_axis_0"), val = int32(0)]; + bool attention_10_update_end_0_interleave_0 = const()[name = string("attention_10_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_10_update_end_0 = concat(axis = attention_10_update_end_0_axis_0, interleave = attention_10_update_end_0_interleave_0, values = (attention_10_update_end_0_values0_0, attention_10_update_end_0_values1_0, end_pos_0, attention_10_update_end_0_values3_0))[name = string("attention_10_update_end_0")]; + tensor attention_10_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_updated_key_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_key_cache_0_squeeze_mask_0, update = attention_10_k_rope, x = coreml_update_state_18)[name = string("attention_10_updated_key_cache_0")]; + write_state(data = attention_10_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_20_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache_state)[name = string("coreml_update_state_20")]; + tensor attention_10_key_cache_begin_0 = const()[name = string("attention_10_key_cache_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor attention_10_key_cache_end_0 = const()[name = string("attention_10_key_cache_end_0"), val = tensor([11, 2, 512, 64])]; + tensor attention_10_key_cache_squeeze_mask_0 = const()[name = string("attention_10_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_key_cache = slice_by_index(begin = attention_10_key_cache_begin_0, end = attention_10_key_cache_end_0, squeeze_mask = attention_10_key_cache_squeeze_mask_0, x = coreml_update_state_20)[name = string("attention_10_key_cache")]; + int32 attention_10_key_cache_head_axis_0 = const()[name = string("attention_10_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_10_key_cache_head_num_splits_0 = const()[name = string("attention_10_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_10_key_cache_head_0, tensor attention_10_key_cache_head_1 = split(axis = attention_10_key_cache_head_axis_0, num_splits = attention_10_key_cache_head_num_splits_0, x = attention_10_key_cache)[name = string("attention_10_key_cache_head")]; + tensor attention_10_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_updated_value_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_value_cache_0_squeeze_mask_0, update = attention_10_split_qkv_heads_2, x = coreml_update_state_19)[name = string("attention_10_updated_value_cache_0")]; + write_state(data = attention_10_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_21_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache_state)[name = string("coreml_update_state_21")]; + tensor attention_10_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_10_slice_current_layer_value_cache_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor attention_10_slice_current_layer_value_cache_end_0 = const()[name = string("attention_10_slice_current_layer_value_cache_end_0"), val = tensor([11, 2, 512, 64])]; + tensor attention_10_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_10_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_slice_current_layer_value_cache = slice_by_index(begin = attention_10_slice_current_layer_value_cache_begin_0, end = attention_10_slice_current_layer_value_cache_end_0, squeeze_mask = attention_10_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_21)[name = string("attention_10_slice_current_layer_value_cache")]; + int32 attention_10_slice_value_cache_heads_axis_0 = const()[name = string("attention_10_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_10_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_10_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_10_slice_value_cache_heads_0, tensor attention_10_slice_value_cache_heads_1 = split(axis = attention_10_slice_value_cache_heads_axis_0, num_splits = attention_10_slice_value_cache_heads_num_splits_0, x = attention_10_slice_current_layer_value_cache)[name = string("attention_10_slice_value_cache_heads")]; + bool attention_10_scores_0_transpose_y_0 = const()[name = string("attention_10_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_10_scores_0_transpose_x_0 = const()[name = string("attention_10_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_10_scores_0 = matmul(transpose_x = attention_10_scores_0_transpose_x_0, transpose_y = attention_10_scores_0_transpose_y_0, x = attention_10_key_cache_head_0, y = attention_10_q_splits_0)[name = string("attention_10_scores_0")]; + fp16 attention_10_scaled_scores_0_y_0 = const()[name = string("attention_10_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_10_scaled_scores_0 = mul(x = attention_10_scores_0, y = attention_10_scaled_scores_0_y_0)[name = string("attention_10_scaled_scores_0")]; + tensor attention_10_masked_scaled_scores_0 = add(x = attention_10_scaled_scores_0, y = transpose_0)[name = string("attention_10_masked_scaled_scores_0")]; + int32 softmax_20_axis_0 = const()[name = string("softmax_20_axis_0"), val = int32(-2)]; + tensor softmax_20 = softmax(axis = softmax_20_axis_0, x = attention_10_masked_scaled_scores_0)[name = string("softmax_20")]; + bool attention_10_attention_0_transpose_x_0 = const()[name = string("attention_10_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_10_attention_0_transpose_y_0 = const()[name = string("attention_10_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_10_attention_0 = matmul(transpose_x = attention_10_attention_0_transpose_x_0, transpose_y = attention_10_attention_0_transpose_y_0, x = softmax_20, y = attention_10_slice_value_cache_heads_0)[name = string("attention_10_attention_0")]; + bool attention_10_scores_1_transpose_y_0 = const()[name = string("attention_10_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_10_scores_1_transpose_x_0 = const()[name = string("attention_10_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_10_scores_1 = matmul(transpose_x = attention_10_scores_1_transpose_x_0, transpose_y = attention_10_scores_1_transpose_y_0, x = attention_10_key_cache_head_1, y = attention_10_q_splits_1)[name = string("attention_10_scores_1")]; + fp16 attention_10_scaled_scores_1_y_0 = const()[name = string("attention_10_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_10_scaled_scores_1 = mul(x = attention_10_scores_1, y = attention_10_scaled_scores_1_y_0)[name = string("attention_10_scaled_scores_1")]; + tensor attention_10_masked_scaled_scores_1 = add(x = attention_10_scaled_scores_1, y = transpose_0)[name = string("attention_10_masked_scaled_scores_1")]; + int32 softmax_21_axis_0 = const()[name = string("softmax_21_axis_0"), val = int32(-2)]; + tensor softmax_21 = softmax(axis = softmax_21_axis_0, x = attention_10_masked_scaled_scores_1)[name = string("softmax_21")]; + bool attention_10_attention_1_transpose_x_0 = const()[name = string("attention_10_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_10_attention_1_transpose_y_0 = const()[name = string("attention_10_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_10_attention_1 = matmul(transpose_x = attention_10_attention_1_transpose_x_0, transpose_y = attention_10_attention_1_transpose_y_0, x = softmax_21, y = attention_10_slice_value_cache_heads_1)[name = string("attention_10_attention_1")]; + int32 attention_10_concat_attention_all_heads_axis_0 = const()[name = string("attention_10_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_10_concat_attention_all_heads_interleave_0 = const()[name = string("attention_10_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_10_concat_attention_all_heads = concat(axis = attention_10_concat_attention_all_heads_axis_0, interleave = attention_10_concat_attention_all_heads_interleave_0, values = (attention_10_attention_0, attention_10_attention_1))[name = string("attention_10_concat_attention_all_heads")]; + tensor attention_10_channels_first_retransposed_perm_0 = const()[name = string("attention_10_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_10_reshaped_shape_0 = const()[name = string("attention_10_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_10_channels_first_retransposed = transpose(perm = attention_10_channels_first_retransposed_perm_0, x = attention_10_concat_attention_all_heads)[name = string("transpose_27")]; + tensor attention_10_reshaped = reshape(shape = attention_10_reshaped_shape_0, x = attention_10_channels_first_retransposed)[name = string("attention_10_reshaped")]; + tensor attention_10_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408147712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408749888))))[name = string("attention_10_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_46 = constexpr_blockwise_shift_scale(data = attention_10_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408778624))))[name = string("constexpr_blockwise_shift_scale_46")]; + tensor attention_10_outproj_strides_0 = const()[name = string("attention_10_outproj_strides_0"), val = tensor([1])]; + string attention_10_outproj_pad_type_0 = const()[name = string("attention_10_outproj_pad_type_0"), val = string("valid")]; + tensor attention_10_outproj_pad_0 = const()[name = string("attention_10_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_10_outproj_dilations_0 = const()[name = string("attention_10_outproj_dilations_0"), val = tensor([1])]; + int32 attention_10_outproj_groups_0 = const()[name = string("attention_10_outproj_groups_0"), val = int32(1)]; + tensor attention_10_outproj = conv(dilations = attention_10_outproj_dilations_0, groups = attention_10_outproj_groups_0, pad = attention_10_outproj_pad_0, pad_type = attention_10_outproj_pad_type_0, strides = attention_10_outproj_strides_0, weight = constexpr_blockwise_shift_scale_46, x = attention_10_reshaped)[name = string("attention_10_outproj")]; + tensor block_10_residual_1 = add(x = block_9_residual_2, y = attention_10_outproj)[name = string("block_10_residual_1")]; + tensor block_10_ffn_rmsnorm_abs = abs(x = block_10_residual_1)[name = string("block_10_ffn_rmsnorm_abs")]; + tensor block_10_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_10_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_10_ffn_rmsnorm_maxval = reduce_max(axes = block_10_ffn_rmsnorm_maxval_axes_0, keep_dims = block_10_ffn_rmsnorm_maxval_keep_dims_0, x = block_10_ffn_rmsnorm_abs)[name = string("block_10_ffn_rmsnorm_maxval")]; + fp16 block_10_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_10_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_10_ffn_rmsnorm_maxval_clipped = clip(alpha = block_10_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_10_ffn_rmsnorm_maxval_clipped_beta_0, x = block_10_ffn_rmsnorm_maxval)[name = string("block_10_ffn_rmsnorm_maxval_clipped")]; + tensor block_10_ffn_rmsnorm_scaled = real_div(x = block_10_residual_1, y = block_10_ffn_rmsnorm_maxval_clipped)[name = string("block_10_ffn_rmsnorm_scaled")]; + tensor block_10_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_10_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_10_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_10_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_10_ffn_rmsnorm_scaled)[name = string("block_10_ffn_rmsnorm_squared_sum")]; + fp16 block_10_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_10_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_10_ffn_rmsnorm_rsqrt_epsilon_0, x = block_10_ffn_rmsnorm_squared_sum)[name = string("block_10_ffn_rmsnorm_rsqrt")]; + fp16 block_10_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_10_ffn_rmsnorm_dim_scaled = mul(x = block_10_ffn_rmsnorm_scaled, y = block_10_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_10_ffn_rmsnorm_dim_scaled")]; + tensor block_10_ffn_rmsnorm_normalized = mul(x = block_10_ffn_rmsnorm_dim_scaled, y = block_10_ffn_rmsnorm_rsqrt)[name = string("block_10_ffn_rmsnorm_normalized")]; + tensor block_10_ffn_rmsnorm_y_0 = const()[name = string("block_10_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408780480)))]; + tensor block_10_ffn_rmsnorm = mul(x = block_10_ffn_rmsnorm_normalized, y = block_10_ffn_rmsnorm_y_0)[name = string("block_10_ffn_rmsnorm")]; + tensor block_10_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408782336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412051008))))[name = string("block_10_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_47 = constexpr_blockwise_shift_scale(data = block_10_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412206720))))[name = string("constexpr_blockwise_shift_scale_47")]; + tensor block_10_ffn_inproj_strides_0 = const()[name = string("block_10_ffn_inproj_strides_0"), val = tensor([1])]; + string block_10_ffn_inproj_pad_type_0 = const()[name = string("block_10_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_10_ffn_inproj_pad_0 = const()[name = string("block_10_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_10_ffn_inproj_dilations_0 = const()[name = string("block_10_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_10_ffn_inproj_groups_0 = const()[name = string("block_10_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_10_ffn_inproj = conv(dilations = block_10_ffn_inproj_dilations_0, groups = block_10_ffn_inproj_groups_0, pad = block_10_ffn_inproj_pad_0, pad_type = block_10_ffn_inproj_pad_type_0, strides = block_10_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_47, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_inproj")]; + tensor block_10_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412216512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415485184))))[name = string("block_10_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_48 = constexpr_blockwise_shift_scale(data = block_10_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415640896))))[name = string("constexpr_blockwise_shift_scale_48")]; + tensor block_10_ffn_g_strides_0 = const()[name = string("block_10_ffn_g_strides_0"), val = tensor([1])]; + string block_10_ffn_g_pad_type_0 = const()[name = string("block_10_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_10_ffn_g_pad_0 = const()[name = string("block_10_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_10_ffn_g_dilations_0 = const()[name = string("block_10_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_10_ffn_g_groups_0 = const()[name = string("block_10_ffn_g_groups_0"), val = int32(1)]; + tensor block_10_ffn_g = conv(dilations = block_10_ffn_g_dilations_0, groups = block_10_ffn_g_groups_0, pad = block_10_ffn_g_pad_0, pad_type = block_10_ffn_g_pad_type_0, strides = block_10_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_48, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_g")]; + tensor block_10_ffn_g_activation = silu(x = block_10_ffn_g)[name = string("block_10_ffn_g_activation")]; + tensor block_10_ffn_x_gated = mul(x = block_10_ffn_inproj, y = block_10_ffn_g_activation)[name = string("block_10_ffn_x_gated")]; + tensor block_10_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415650688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418919360))))[name = string("block_10_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_49 = constexpr_blockwise_shift_scale(data = block_10_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418948096))))[name = string("constexpr_blockwise_shift_scale_49")]; + tensor block_10_ffn_outproj_strides_0 = const()[name = string("block_10_ffn_outproj_strides_0"), val = tensor([1])]; + string block_10_ffn_outproj_pad_type_0 = const()[name = string("block_10_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_10_ffn_outproj_pad_0 = const()[name = string("block_10_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_10_ffn_outproj_dilations_0 = const()[name = string("block_10_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_10_ffn_outproj_groups_0 = const()[name = string("block_10_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_10_ffn_outproj = conv(dilations = block_10_ffn_outproj_dilations_0, groups = block_10_ffn_outproj_groups_0, pad = block_10_ffn_outproj_pad_0, pad_type = block_10_ffn_outproj_pad_type_0, strides = block_10_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_49, x = block_10_ffn_x_gated)[name = string("block_10_ffn_outproj")]; + tensor block_10_residual_2 = add(x = block_10_ffn_outproj, y = block_10_residual_1)[name = string("block_10_residual_2")]; + tensor block_11_attention_rmsnorm_abs = abs(x = block_10_residual_2)[name = string("block_11_attention_rmsnorm_abs")]; + tensor block_11_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_11_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_11_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_11_attention_rmsnorm_maxval = reduce_max(axes = block_11_attention_rmsnorm_maxval_axes_0, keep_dims = block_11_attention_rmsnorm_maxval_keep_dims_0, x = block_11_attention_rmsnorm_abs)[name = string("block_11_attention_rmsnorm_maxval")]; + fp16 block_11_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_11_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_11_attention_rmsnorm_maxval_clipped = clip(alpha = block_11_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_11_attention_rmsnorm_maxval_clipped_beta_0, x = block_11_attention_rmsnorm_maxval)[name = string("block_11_attention_rmsnorm_maxval_clipped")]; + tensor block_11_attention_rmsnorm_scaled = real_div(x = block_10_residual_2, y = block_11_attention_rmsnorm_maxval_clipped)[name = string("block_11_attention_rmsnorm_scaled")]; + tensor block_11_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_11_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_11_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_11_attention_rmsnorm_squared_sum_keep_dims_0, x = block_11_attention_rmsnorm_scaled)[name = string("block_11_attention_rmsnorm_squared_sum")]; + fp16 block_11_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_11_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_11_attention_rmsnorm_rsqrt_epsilon_0, x = block_11_attention_rmsnorm_squared_sum)[name = string("block_11_attention_rmsnorm_rsqrt")]; + fp16 block_11_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_11_attention_rmsnorm_dim_scaled = mul(x = block_11_attention_rmsnorm_scaled, y = block_11_attention_rmsnorm_dim_scaled_y_0)[name = string("block_11_attention_rmsnorm_dim_scaled")]; + tensor block_11_attention_rmsnorm_normalized = mul(x = block_11_attention_rmsnorm_dim_scaled, y = block_11_attention_rmsnorm_rsqrt)[name = string("block_11_attention_rmsnorm_normalized")]; + tensor block_11_attention_rmsnorm_y_0 = const()[name = string("block_11_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418949952)))]; + tensor block_11_attention_rmsnorm = mul(x = block_11_attention_rmsnorm_normalized, y = block_11_attention_rmsnorm_y_0)[name = string("block_11_attention_rmsnorm")]; + tensor attention_11_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418951808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419726016))))[name = string("attention_11_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_50 = constexpr_blockwise_shift_scale(data = attention_11_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419762944))))[name = string("constexpr_blockwise_shift_scale_50")]; + tensor attention_11_qkvproj_bias_0 = const()[name = string("attention_11_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419765312)))]; + tensor attention_11_qkvproj_strides_0 = const()[name = string("attention_11_qkvproj_strides_0"), val = tensor([1])]; + string attention_11_qkvproj_pad_type_0 = const()[name = string("attention_11_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_11_qkvproj_pad_0 = const()[name = string("attention_11_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_11_qkvproj_dilations_0 = const()[name = string("attention_11_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_11_qkvproj_groups_0 = const()[name = string("attention_11_qkvproj_groups_0"), val = int32(1)]; + tensor attention_11_qkvproj = conv(bias = attention_11_qkvproj_bias_0, dilations = attention_11_qkvproj_dilations_0, groups = attention_11_qkvproj_groups_0, pad = attention_11_qkvproj_pad_0, pad_type = attention_11_qkvproj_pad_type_0, strides = attention_11_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_50, x = block_11_attention_rmsnorm)[name = string("attention_11_qkvproj")]; + tensor attention_11_head_reshape_shape_0 = const()[name = string("attention_11_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_11_head_reshape = reshape(shape = attention_11_head_reshape_shape_0, x = attention_11_qkvproj)[name = string("attention_11_head_reshape")]; + tensor attention_11_head_transpose_perm_0 = const()[name = string("attention_11_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_11_split_qkv_heads_axis_0 = const()[name = string("attention_11_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_11_split_qkv_heads_split_sizes_0 = const()[name = string("attention_11_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_11_head_transpose = transpose(perm = attention_11_head_transpose_perm_0, x = attention_11_head_reshape)[name = string("transpose_26")]; + tensor attention_11_split_qkv_heads_0, tensor attention_11_split_qkv_heads_1, tensor attention_11_split_qkv_heads_2 = split(axis = attention_11_split_qkv_heads_axis_0, split_sizes = attention_11_split_qkv_heads_split_sizes_0, x = attention_11_head_transpose)[name = string("attention_11_split_qkv_heads")]; + tensor attention_11_q_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_11_q_rope_lhs_mult")]; + int32 attention_11_q_rotate_half_split_num_splits_0 = const()[name = string("attention_11_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_11_q_rotate_half_split_axis_0 = const()[name = string("attention_11_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_11_q_rotate_half_split_0, tensor attention_11_q_rotate_half_split_1 = split(axis = attention_11_q_rotate_half_split_axis_0, num_splits = attention_11_q_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_0)[name = string("attention_11_q_rotate_half_split")]; + fp16 attention_11_q_rotate_half_neg_y_0 = const()[name = string("attention_11_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_11_q_rotate_half_neg = mul(x = attention_11_q_rotate_half_split_1, y = attention_11_q_rotate_half_neg_y_0)[name = string("attention_11_q_rotate_half_neg")]; + int32 attention_11_q_rotate_half_concat_axis_0 = const()[name = string("attention_11_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_11_q_rotate_half_concat_interleave_0 = const()[name = string("attention_11_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_11_q_rotate_half_concat = concat(axis = attention_11_q_rotate_half_concat_axis_0, interleave = attention_11_q_rotate_half_concat_interleave_0, values = (attention_11_q_rotate_half_neg, attention_11_q_rotate_half_split_0))[name = string("attention_11_q_rotate_half_concat")]; + tensor attention_11_q_rope_rhs_mult = mul(x = attention_11_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_q_rope_rhs_mult")]; + tensor attention_11_q_rope = add(x = attention_11_q_rope_lhs_mult, y = attention_11_q_rope_rhs_mult)[name = string("attention_11_q_rope")]; + tensor attention_11_k_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_11_k_rope_lhs_mult")]; + int32 attention_11_k_rotate_half_split_num_splits_0 = const()[name = string("attention_11_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_11_k_rotate_half_split_axis_0 = const()[name = string("attention_11_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_11_k_rotate_half_split_0, tensor attention_11_k_rotate_half_split_1 = split(axis = attention_11_k_rotate_half_split_axis_0, num_splits = attention_11_k_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_1)[name = string("attention_11_k_rotate_half_split")]; + fp16 attention_11_k_rotate_half_neg_y_0 = const()[name = string("attention_11_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_11_k_rotate_half_neg = mul(x = attention_11_k_rotate_half_split_1, y = attention_11_k_rotate_half_neg_y_0)[name = string("attention_11_k_rotate_half_neg")]; + int32 attention_11_k_rotate_half_concat_axis_0 = const()[name = string("attention_11_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_11_k_rotate_half_concat_interleave_0 = const()[name = string("attention_11_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_11_k_rotate_half_concat = concat(axis = attention_11_k_rotate_half_concat_axis_0, interleave = attention_11_k_rotate_half_concat_interleave_0, values = (attention_11_k_rotate_half_neg, attention_11_k_rotate_half_split_0))[name = string("attention_11_k_rotate_half_concat")]; + tensor attention_11_k_rope_rhs_mult = mul(x = attention_11_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_k_rope_rhs_mult")]; + tensor attention_11_k_rope = add(x = attention_11_k_rope_lhs_mult, y = attention_11_k_rope_rhs_mult)[name = string("attention_11_k_rope")]; + int32 attention_11_q_splits_axis_0 = const()[name = string("attention_11_q_splits_axis_0"), val = int32(1)]; + int32 attention_11_q_splits_num_splits_0 = const()[name = string("attention_11_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_11_q_splits_0, tensor attention_11_q_splits_1 = split(axis = attention_11_q_splits_axis_0, num_splits = attention_11_q_splits_num_splits_0, x = attention_11_q_rope)[name = string("attention_11_q_splits")]; + tensor attention_11_update_begin_0_values0_0 = const()[name = string("attention_11_update_begin_0_values0_0"), val = tensor([11])]; + tensor attention_11_update_begin_0_values1_0 = const()[name = string("attention_11_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_11_update_begin_0_values3_0 = const()[name = string("attention_11_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_11_update_begin_0_axis_0 = const()[name = string("attention_11_update_begin_0_axis_0"), val = int32(0)]; + bool attention_11_update_begin_0_interleave_0 = const()[name = string("attention_11_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_11_update_begin_0 = concat(axis = attention_11_update_begin_0_axis_0, interleave = attention_11_update_begin_0_interleave_0, values = (attention_11_update_begin_0_values0_0, attention_11_update_begin_0_values1_0, query_pos1, attention_11_update_begin_0_values3_0))[name = string("attention_11_update_begin_0")]; + tensor attention_11_update_end_0_values0_0 = const()[name = string("attention_11_update_end_0_values0_0"), val = tensor([12])]; + tensor attention_11_update_end_0_values1_0 = const()[name = string("attention_11_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_11_update_end_0_values3_0 = const()[name = string("attention_11_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_11_update_end_0_axis_0 = const()[name = string("attention_11_update_end_0_axis_0"), val = int32(0)]; + bool attention_11_update_end_0_interleave_0 = const()[name = string("attention_11_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_11_update_end_0 = concat(axis = attention_11_update_end_0_axis_0, interleave = attention_11_update_end_0_interleave_0, values = (attention_11_update_end_0_values0_0, attention_11_update_end_0_values1_0, end_pos_0, attention_11_update_end_0_values3_0))[name = string("attention_11_update_end_0")]; + tensor attention_11_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_updated_key_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_key_cache_0_squeeze_mask_0, update = attention_11_k_rope, x = coreml_update_state_20)[name = string("attention_11_updated_key_cache_0")]; + write_state(data = attention_11_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_22_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache_state)[name = string("coreml_update_state_22")]; + tensor attention_11_key_cache_begin_0 = const()[name = string("attention_11_key_cache_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor attention_11_key_cache_end_0 = const()[name = string("attention_11_key_cache_end_0"), val = tensor([12, 2, 512, 64])]; + tensor attention_11_key_cache_squeeze_mask_0 = const()[name = string("attention_11_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_key_cache = slice_by_index(begin = attention_11_key_cache_begin_0, end = attention_11_key_cache_end_0, squeeze_mask = attention_11_key_cache_squeeze_mask_0, x = coreml_update_state_22)[name = string("attention_11_key_cache")]; + int32 attention_11_key_cache_head_axis_0 = const()[name = string("attention_11_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_11_key_cache_head_num_splits_0 = const()[name = string("attention_11_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_11_key_cache_head_0, tensor attention_11_key_cache_head_1 = split(axis = attention_11_key_cache_head_axis_0, num_splits = attention_11_key_cache_head_num_splits_0, x = attention_11_key_cache)[name = string("attention_11_key_cache_head")]; + tensor attention_11_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_updated_value_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_value_cache_0_squeeze_mask_0, update = attention_11_split_qkv_heads_2, x = coreml_update_state_21)[name = string("attention_11_updated_value_cache_0")]; + write_state(data = attention_11_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_23_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache_state)[name = string("coreml_update_state_23")]; + tensor attention_11_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_11_slice_current_layer_value_cache_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor attention_11_slice_current_layer_value_cache_end_0 = const()[name = string("attention_11_slice_current_layer_value_cache_end_0"), val = tensor([12, 2, 512, 64])]; + tensor attention_11_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_11_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_slice_current_layer_value_cache = slice_by_index(begin = attention_11_slice_current_layer_value_cache_begin_0, end = attention_11_slice_current_layer_value_cache_end_0, squeeze_mask = attention_11_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_23)[name = string("attention_11_slice_current_layer_value_cache")]; + int32 attention_11_slice_value_cache_heads_axis_0 = const()[name = string("attention_11_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_11_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_11_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_11_slice_value_cache_heads_0, tensor attention_11_slice_value_cache_heads_1 = split(axis = attention_11_slice_value_cache_heads_axis_0, num_splits = attention_11_slice_value_cache_heads_num_splits_0, x = attention_11_slice_current_layer_value_cache)[name = string("attention_11_slice_value_cache_heads")]; + bool attention_11_scores_0_transpose_y_0 = const()[name = string("attention_11_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_11_scores_0_transpose_x_0 = const()[name = string("attention_11_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_11_scores_0 = matmul(transpose_x = attention_11_scores_0_transpose_x_0, transpose_y = attention_11_scores_0_transpose_y_0, x = attention_11_key_cache_head_0, y = attention_11_q_splits_0)[name = string("attention_11_scores_0")]; + fp16 attention_11_scaled_scores_0_y_0 = const()[name = string("attention_11_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_11_scaled_scores_0 = mul(x = attention_11_scores_0, y = attention_11_scaled_scores_0_y_0)[name = string("attention_11_scaled_scores_0")]; + tensor attention_11_masked_scaled_scores_0 = add(x = attention_11_scaled_scores_0, y = transpose_0)[name = string("attention_11_masked_scaled_scores_0")]; + int32 softmax_22_axis_0 = const()[name = string("softmax_22_axis_0"), val = int32(-2)]; + tensor softmax_22 = softmax(axis = softmax_22_axis_0, x = attention_11_masked_scaled_scores_0)[name = string("softmax_22")]; + bool attention_11_attention_0_transpose_x_0 = const()[name = string("attention_11_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_11_attention_0_transpose_y_0 = const()[name = string("attention_11_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_11_attention_0 = matmul(transpose_x = attention_11_attention_0_transpose_x_0, transpose_y = attention_11_attention_0_transpose_y_0, x = softmax_22, y = attention_11_slice_value_cache_heads_0)[name = string("attention_11_attention_0")]; + bool attention_11_scores_1_transpose_y_0 = const()[name = string("attention_11_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_11_scores_1_transpose_x_0 = const()[name = string("attention_11_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_11_scores_1 = matmul(transpose_x = attention_11_scores_1_transpose_x_0, transpose_y = attention_11_scores_1_transpose_y_0, x = attention_11_key_cache_head_1, y = attention_11_q_splits_1)[name = string("attention_11_scores_1")]; + fp16 attention_11_scaled_scores_1_y_0 = const()[name = string("attention_11_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_11_scaled_scores_1 = mul(x = attention_11_scores_1, y = attention_11_scaled_scores_1_y_0)[name = string("attention_11_scaled_scores_1")]; + tensor attention_11_masked_scaled_scores_1 = add(x = attention_11_scaled_scores_1, y = transpose_0)[name = string("attention_11_masked_scaled_scores_1")]; + int32 softmax_23_axis_0 = const()[name = string("softmax_23_axis_0"), val = int32(-2)]; + tensor softmax_23 = softmax(axis = softmax_23_axis_0, x = attention_11_masked_scaled_scores_1)[name = string("softmax_23")]; + bool attention_11_attention_1_transpose_x_0 = const()[name = string("attention_11_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_11_attention_1_transpose_y_0 = const()[name = string("attention_11_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_11_attention_1 = matmul(transpose_x = attention_11_attention_1_transpose_x_0, transpose_y = attention_11_attention_1_transpose_y_0, x = softmax_23, y = attention_11_slice_value_cache_heads_1)[name = string("attention_11_attention_1")]; + int32 attention_11_concat_attention_all_heads_axis_0 = const()[name = string("attention_11_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_11_concat_attention_all_heads_interleave_0 = const()[name = string("attention_11_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_11_concat_attention_all_heads = concat(axis = attention_11_concat_attention_all_heads_axis_0, interleave = attention_11_concat_attention_all_heads_interleave_0, values = (attention_11_attention_0, attention_11_attention_1))[name = string("attention_11_concat_attention_all_heads")]; + tensor attention_11_channels_first_retransposed_perm_0 = const()[name = string("attention_11_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_11_reshaped_shape_0 = const()[name = string("attention_11_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_11_channels_first_retransposed = transpose(perm = attention_11_channels_first_retransposed_perm_0, x = attention_11_concat_attention_all_heads)[name = string("transpose_25")]; + tensor attention_11_reshaped = reshape(shape = attention_11_reshaped_shape_0, x = attention_11_channels_first_retransposed)[name = string("attention_11_reshaped")]; + tensor attention_11_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419767680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420369856))))[name = string("attention_11_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_51 = constexpr_blockwise_shift_scale(data = attention_11_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420398592))))[name = string("constexpr_blockwise_shift_scale_51")]; + tensor attention_11_outproj_strides_0 = const()[name = string("attention_11_outproj_strides_0"), val = tensor([1])]; + string attention_11_outproj_pad_type_0 = const()[name = string("attention_11_outproj_pad_type_0"), val = string("valid")]; + tensor attention_11_outproj_pad_0 = const()[name = string("attention_11_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_11_outproj_dilations_0 = const()[name = string("attention_11_outproj_dilations_0"), val = tensor([1])]; + int32 attention_11_outproj_groups_0 = const()[name = string("attention_11_outproj_groups_0"), val = int32(1)]; + tensor attention_11_outproj = conv(dilations = attention_11_outproj_dilations_0, groups = attention_11_outproj_groups_0, pad = attention_11_outproj_pad_0, pad_type = attention_11_outproj_pad_type_0, strides = attention_11_outproj_strides_0, weight = constexpr_blockwise_shift_scale_51, x = attention_11_reshaped)[name = string("attention_11_outproj")]; + tensor block_11_residual_1 = add(x = block_10_residual_2, y = attention_11_outproj)[name = string("block_11_residual_1")]; + tensor block_11_ffn_rmsnorm_abs = abs(x = block_11_residual_1)[name = string("block_11_ffn_rmsnorm_abs")]; + tensor block_11_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_11_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_11_ffn_rmsnorm_maxval = reduce_max(axes = block_11_ffn_rmsnorm_maxval_axes_0, keep_dims = block_11_ffn_rmsnorm_maxval_keep_dims_0, x = block_11_ffn_rmsnorm_abs)[name = string("block_11_ffn_rmsnorm_maxval")]; + fp16 block_11_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_11_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_11_ffn_rmsnorm_maxval_clipped = clip(alpha = block_11_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_11_ffn_rmsnorm_maxval_clipped_beta_0, x = block_11_ffn_rmsnorm_maxval)[name = string("block_11_ffn_rmsnorm_maxval_clipped")]; + tensor block_11_ffn_rmsnorm_scaled = real_div(x = block_11_residual_1, y = block_11_ffn_rmsnorm_maxval_clipped)[name = string("block_11_ffn_rmsnorm_scaled")]; + tensor block_11_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_11_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_11_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_11_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_11_ffn_rmsnorm_scaled)[name = string("block_11_ffn_rmsnorm_squared_sum")]; + fp16 block_11_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_11_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_11_ffn_rmsnorm_rsqrt_epsilon_0, x = block_11_ffn_rmsnorm_squared_sum)[name = string("block_11_ffn_rmsnorm_rsqrt")]; + fp16 block_11_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_11_ffn_rmsnorm_dim_scaled = mul(x = block_11_ffn_rmsnorm_scaled, y = block_11_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_11_ffn_rmsnorm_dim_scaled")]; + tensor block_11_ffn_rmsnorm_normalized = mul(x = block_11_ffn_rmsnorm_dim_scaled, y = block_11_ffn_rmsnorm_rsqrt)[name = string("block_11_ffn_rmsnorm_normalized")]; + tensor block_11_ffn_rmsnorm_y_0 = const()[name = string("block_11_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420400448)))]; + tensor block_11_ffn_rmsnorm = mul(x = block_11_ffn_rmsnorm_normalized, y = block_11_ffn_rmsnorm_y_0)[name = string("block_11_ffn_rmsnorm")]; + tensor block_11_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420402304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423670976))))[name = string("block_11_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_52 = constexpr_blockwise_shift_scale(data = block_11_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423826688))))[name = string("constexpr_blockwise_shift_scale_52")]; + tensor block_11_ffn_inproj_strides_0 = const()[name = string("block_11_ffn_inproj_strides_0"), val = tensor([1])]; + string block_11_ffn_inproj_pad_type_0 = const()[name = string("block_11_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_11_ffn_inproj_pad_0 = const()[name = string("block_11_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_11_ffn_inproj_dilations_0 = const()[name = string("block_11_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_11_ffn_inproj_groups_0 = const()[name = string("block_11_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_11_ffn_inproj = conv(dilations = block_11_ffn_inproj_dilations_0, groups = block_11_ffn_inproj_groups_0, pad = block_11_ffn_inproj_pad_0, pad_type = block_11_ffn_inproj_pad_type_0, strides = block_11_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_52, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_inproj")]; + tensor block_11_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423836480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427105152))))[name = string("block_11_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_53 = constexpr_blockwise_shift_scale(data = block_11_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427260864))))[name = string("constexpr_blockwise_shift_scale_53")]; + tensor block_11_ffn_g_strides_0 = const()[name = string("block_11_ffn_g_strides_0"), val = tensor([1])]; + string block_11_ffn_g_pad_type_0 = const()[name = string("block_11_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_11_ffn_g_pad_0 = const()[name = string("block_11_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_11_ffn_g_dilations_0 = const()[name = string("block_11_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_11_ffn_g_groups_0 = const()[name = string("block_11_ffn_g_groups_0"), val = int32(1)]; + tensor block_11_ffn_g = conv(dilations = block_11_ffn_g_dilations_0, groups = block_11_ffn_g_groups_0, pad = block_11_ffn_g_pad_0, pad_type = block_11_ffn_g_pad_type_0, strides = block_11_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_53, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_g")]; + tensor block_11_ffn_g_activation = silu(x = block_11_ffn_g)[name = string("block_11_ffn_g_activation")]; + tensor block_11_ffn_x_gated = mul(x = block_11_ffn_inproj, y = block_11_ffn_g_activation)[name = string("block_11_ffn_x_gated")]; + tensor block_11_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427270656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430539328))))[name = string("block_11_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_54 = constexpr_blockwise_shift_scale(data = block_11_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430568064))))[name = string("constexpr_blockwise_shift_scale_54")]; + tensor block_11_ffn_outproj_strides_0 = const()[name = string("block_11_ffn_outproj_strides_0"), val = tensor([1])]; + string block_11_ffn_outproj_pad_type_0 = const()[name = string("block_11_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_11_ffn_outproj_pad_0 = const()[name = string("block_11_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_11_ffn_outproj_dilations_0 = const()[name = string("block_11_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_11_ffn_outproj_groups_0 = const()[name = string("block_11_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_11_ffn_outproj = conv(dilations = block_11_ffn_outproj_dilations_0, groups = block_11_ffn_outproj_groups_0, pad = block_11_ffn_outproj_pad_0, pad_type = block_11_ffn_outproj_pad_type_0, strides = block_11_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_54, x = block_11_ffn_x_gated)[name = string("block_11_ffn_outproj")]; + tensor block_11_residual_2 = add(x = block_11_ffn_outproj, y = block_11_residual_1)[name = string("block_11_residual_2")]; + tensor block_12_attention_rmsnorm_abs = abs(x = block_11_residual_2)[name = string("block_12_attention_rmsnorm_abs")]; + tensor block_12_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_12_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_12_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_12_attention_rmsnorm_maxval = reduce_max(axes = block_12_attention_rmsnorm_maxval_axes_0, keep_dims = block_12_attention_rmsnorm_maxval_keep_dims_0, x = block_12_attention_rmsnorm_abs)[name = string("block_12_attention_rmsnorm_maxval")]; + fp16 block_12_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_12_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_12_attention_rmsnorm_maxval_clipped = clip(alpha = block_12_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_12_attention_rmsnorm_maxval_clipped_beta_0, x = block_12_attention_rmsnorm_maxval)[name = string("block_12_attention_rmsnorm_maxval_clipped")]; + tensor block_12_attention_rmsnorm_scaled = real_div(x = block_11_residual_2, y = block_12_attention_rmsnorm_maxval_clipped)[name = string("block_12_attention_rmsnorm_scaled")]; + tensor block_12_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_12_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_12_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_12_attention_rmsnorm_squared_sum_keep_dims_0, x = block_12_attention_rmsnorm_scaled)[name = string("block_12_attention_rmsnorm_squared_sum")]; + fp16 block_12_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_12_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_12_attention_rmsnorm_rsqrt_epsilon_0, x = block_12_attention_rmsnorm_squared_sum)[name = string("block_12_attention_rmsnorm_rsqrt")]; + fp16 block_12_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_12_attention_rmsnorm_dim_scaled = mul(x = block_12_attention_rmsnorm_scaled, y = block_12_attention_rmsnorm_dim_scaled_y_0)[name = string("block_12_attention_rmsnorm_dim_scaled")]; + tensor block_12_attention_rmsnorm_normalized = mul(x = block_12_attention_rmsnorm_dim_scaled, y = block_12_attention_rmsnorm_rsqrt)[name = string("block_12_attention_rmsnorm_normalized")]; + tensor block_12_attention_rmsnorm_y_0 = const()[name = string("block_12_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430569920)))]; + tensor block_12_attention_rmsnorm = mul(x = block_12_attention_rmsnorm_normalized, y = block_12_attention_rmsnorm_y_0)[name = string("block_12_attention_rmsnorm")]; + tensor attention_12_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430571776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431345984))))[name = string("attention_12_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_55 = constexpr_blockwise_shift_scale(data = attention_12_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431382912))))[name = string("constexpr_blockwise_shift_scale_55")]; + tensor attention_12_qkvproj_bias_0 = const()[name = string("attention_12_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431385280)))]; + tensor attention_12_qkvproj_strides_0 = const()[name = string("attention_12_qkvproj_strides_0"), val = tensor([1])]; + string attention_12_qkvproj_pad_type_0 = const()[name = string("attention_12_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_12_qkvproj_pad_0 = const()[name = string("attention_12_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_12_qkvproj_dilations_0 = const()[name = string("attention_12_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_12_qkvproj_groups_0 = const()[name = string("attention_12_qkvproj_groups_0"), val = int32(1)]; + tensor attention_12_qkvproj = conv(bias = attention_12_qkvproj_bias_0, dilations = attention_12_qkvproj_dilations_0, groups = attention_12_qkvproj_groups_0, pad = attention_12_qkvproj_pad_0, pad_type = attention_12_qkvproj_pad_type_0, strides = attention_12_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_55, x = block_12_attention_rmsnorm)[name = string("attention_12_qkvproj")]; + tensor attention_12_head_reshape_shape_0 = const()[name = string("attention_12_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_12_head_reshape = reshape(shape = attention_12_head_reshape_shape_0, x = attention_12_qkvproj)[name = string("attention_12_head_reshape")]; + tensor attention_12_head_transpose_perm_0 = const()[name = string("attention_12_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_12_split_qkv_heads_axis_0 = const()[name = string("attention_12_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_12_split_qkv_heads_split_sizes_0 = const()[name = string("attention_12_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_12_head_transpose = transpose(perm = attention_12_head_transpose_perm_0, x = attention_12_head_reshape)[name = string("transpose_24")]; + tensor attention_12_split_qkv_heads_0, tensor attention_12_split_qkv_heads_1, tensor attention_12_split_qkv_heads_2 = split(axis = attention_12_split_qkv_heads_axis_0, split_sizes = attention_12_split_qkv_heads_split_sizes_0, x = attention_12_head_transpose)[name = string("attention_12_split_qkv_heads")]; + tensor attention_12_q_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_12_q_rope_lhs_mult")]; + int32 attention_12_q_rotate_half_split_num_splits_0 = const()[name = string("attention_12_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_12_q_rotate_half_split_axis_0 = const()[name = string("attention_12_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_12_q_rotate_half_split_0, tensor attention_12_q_rotate_half_split_1 = split(axis = attention_12_q_rotate_half_split_axis_0, num_splits = attention_12_q_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_0)[name = string("attention_12_q_rotate_half_split")]; + fp16 attention_12_q_rotate_half_neg_y_0 = const()[name = string("attention_12_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_12_q_rotate_half_neg = mul(x = attention_12_q_rotate_half_split_1, y = attention_12_q_rotate_half_neg_y_0)[name = string("attention_12_q_rotate_half_neg")]; + int32 attention_12_q_rotate_half_concat_axis_0 = const()[name = string("attention_12_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_12_q_rotate_half_concat_interleave_0 = const()[name = string("attention_12_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_12_q_rotate_half_concat = concat(axis = attention_12_q_rotate_half_concat_axis_0, interleave = attention_12_q_rotate_half_concat_interleave_0, values = (attention_12_q_rotate_half_neg, attention_12_q_rotate_half_split_0))[name = string("attention_12_q_rotate_half_concat")]; + tensor attention_12_q_rope_rhs_mult = mul(x = attention_12_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_q_rope_rhs_mult")]; + tensor attention_12_q_rope = add(x = attention_12_q_rope_lhs_mult, y = attention_12_q_rope_rhs_mult)[name = string("attention_12_q_rope")]; + tensor attention_12_k_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_12_k_rope_lhs_mult")]; + int32 attention_12_k_rotate_half_split_num_splits_0 = const()[name = string("attention_12_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_12_k_rotate_half_split_axis_0 = const()[name = string("attention_12_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_12_k_rotate_half_split_0, tensor attention_12_k_rotate_half_split_1 = split(axis = attention_12_k_rotate_half_split_axis_0, num_splits = attention_12_k_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_1)[name = string("attention_12_k_rotate_half_split")]; + fp16 attention_12_k_rotate_half_neg_y_0 = const()[name = string("attention_12_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_12_k_rotate_half_neg = mul(x = attention_12_k_rotate_half_split_1, y = attention_12_k_rotate_half_neg_y_0)[name = string("attention_12_k_rotate_half_neg")]; + int32 attention_12_k_rotate_half_concat_axis_0 = const()[name = string("attention_12_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_12_k_rotate_half_concat_interleave_0 = const()[name = string("attention_12_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_12_k_rotate_half_concat = concat(axis = attention_12_k_rotate_half_concat_axis_0, interleave = attention_12_k_rotate_half_concat_interleave_0, values = (attention_12_k_rotate_half_neg, attention_12_k_rotate_half_split_0))[name = string("attention_12_k_rotate_half_concat")]; + tensor attention_12_k_rope_rhs_mult = mul(x = attention_12_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_k_rope_rhs_mult")]; + tensor attention_12_k_rope = add(x = attention_12_k_rope_lhs_mult, y = attention_12_k_rope_rhs_mult)[name = string("attention_12_k_rope")]; + int32 attention_12_q_splits_axis_0 = const()[name = string("attention_12_q_splits_axis_0"), val = int32(1)]; + int32 attention_12_q_splits_num_splits_0 = const()[name = string("attention_12_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_12_q_splits_0, tensor attention_12_q_splits_1 = split(axis = attention_12_q_splits_axis_0, num_splits = attention_12_q_splits_num_splits_0, x = attention_12_q_rope)[name = string("attention_12_q_splits")]; + tensor attention_12_update_begin_0_values0_0 = const()[name = string("attention_12_update_begin_0_values0_0"), val = tensor([12])]; + tensor attention_12_update_begin_0_values1_0 = const()[name = string("attention_12_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_12_update_begin_0_values3_0 = const()[name = string("attention_12_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_12_update_begin_0_axis_0 = const()[name = string("attention_12_update_begin_0_axis_0"), val = int32(0)]; + bool attention_12_update_begin_0_interleave_0 = const()[name = string("attention_12_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_12_update_begin_0 = concat(axis = attention_12_update_begin_0_axis_0, interleave = attention_12_update_begin_0_interleave_0, values = (attention_12_update_begin_0_values0_0, attention_12_update_begin_0_values1_0, query_pos1, attention_12_update_begin_0_values3_0))[name = string("attention_12_update_begin_0")]; + tensor attention_12_update_end_0_values0_0 = const()[name = string("attention_12_update_end_0_values0_0"), val = tensor([13])]; + tensor attention_12_update_end_0_values1_0 = const()[name = string("attention_12_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_12_update_end_0_values3_0 = const()[name = string("attention_12_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_12_update_end_0_axis_0 = const()[name = string("attention_12_update_end_0_axis_0"), val = int32(0)]; + bool attention_12_update_end_0_interleave_0 = const()[name = string("attention_12_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_12_update_end_0 = concat(axis = attention_12_update_end_0_axis_0, interleave = attention_12_update_end_0_interleave_0, values = (attention_12_update_end_0_values0_0, attention_12_update_end_0_values1_0, end_pos_0, attention_12_update_end_0_values3_0))[name = string("attention_12_update_end_0")]; + tensor attention_12_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_updated_key_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_key_cache_0_squeeze_mask_0, update = attention_12_k_rope, x = coreml_update_state_22)[name = string("attention_12_updated_key_cache_0")]; + write_state(data = attention_12_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_24_write_state")]; + tensor coreml_update_state_24 = read_state(input = key_cache_state)[name = string("coreml_update_state_24")]; + tensor attention_12_key_cache_begin_0 = const()[name = string("attention_12_key_cache_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor attention_12_key_cache_end_0 = const()[name = string("attention_12_key_cache_end_0"), val = tensor([13, 2, 512, 64])]; + tensor attention_12_key_cache_squeeze_mask_0 = const()[name = string("attention_12_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_key_cache = slice_by_index(begin = attention_12_key_cache_begin_0, end = attention_12_key_cache_end_0, squeeze_mask = attention_12_key_cache_squeeze_mask_0, x = coreml_update_state_24)[name = string("attention_12_key_cache")]; + int32 attention_12_key_cache_head_axis_0 = const()[name = string("attention_12_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_12_key_cache_head_num_splits_0 = const()[name = string("attention_12_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_12_key_cache_head_0, tensor attention_12_key_cache_head_1 = split(axis = attention_12_key_cache_head_axis_0, num_splits = attention_12_key_cache_head_num_splits_0, x = attention_12_key_cache)[name = string("attention_12_key_cache_head")]; + tensor attention_12_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_updated_value_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_value_cache_0_squeeze_mask_0, update = attention_12_split_qkv_heads_2, x = coreml_update_state_23)[name = string("attention_12_updated_value_cache_0")]; + write_state(data = attention_12_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_25_write_state")]; + tensor coreml_update_state_25 = read_state(input = value_cache_state)[name = string("coreml_update_state_25")]; + tensor attention_12_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_12_slice_current_layer_value_cache_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor attention_12_slice_current_layer_value_cache_end_0 = const()[name = string("attention_12_slice_current_layer_value_cache_end_0"), val = tensor([13, 2, 512, 64])]; + tensor attention_12_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_12_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_slice_current_layer_value_cache = slice_by_index(begin = attention_12_slice_current_layer_value_cache_begin_0, end = attention_12_slice_current_layer_value_cache_end_0, squeeze_mask = attention_12_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_25)[name = string("attention_12_slice_current_layer_value_cache")]; + int32 attention_12_slice_value_cache_heads_axis_0 = const()[name = string("attention_12_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_12_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_12_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_12_slice_value_cache_heads_0, tensor attention_12_slice_value_cache_heads_1 = split(axis = attention_12_slice_value_cache_heads_axis_0, num_splits = attention_12_slice_value_cache_heads_num_splits_0, x = attention_12_slice_current_layer_value_cache)[name = string("attention_12_slice_value_cache_heads")]; + bool attention_12_scores_0_transpose_y_0 = const()[name = string("attention_12_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_12_scores_0_transpose_x_0 = const()[name = string("attention_12_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_12_scores_0 = matmul(transpose_x = attention_12_scores_0_transpose_x_0, transpose_y = attention_12_scores_0_transpose_y_0, x = attention_12_key_cache_head_0, y = attention_12_q_splits_0)[name = string("attention_12_scores_0")]; + fp16 attention_12_scaled_scores_0_y_0 = const()[name = string("attention_12_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_12_scaled_scores_0 = mul(x = attention_12_scores_0, y = attention_12_scaled_scores_0_y_0)[name = string("attention_12_scaled_scores_0")]; + tensor attention_12_masked_scaled_scores_0 = add(x = attention_12_scaled_scores_0, y = transpose_0)[name = string("attention_12_masked_scaled_scores_0")]; + int32 softmax_24_axis_0 = const()[name = string("softmax_24_axis_0"), val = int32(-2)]; + tensor softmax_24 = softmax(axis = softmax_24_axis_0, x = attention_12_masked_scaled_scores_0)[name = string("softmax_24")]; + bool attention_12_attention_0_transpose_x_0 = const()[name = string("attention_12_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_12_attention_0_transpose_y_0 = const()[name = string("attention_12_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_12_attention_0 = matmul(transpose_x = attention_12_attention_0_transpose_x_0, transpose_y = attention_12_attention_0_transpose_y_0, x = softmax_24, y = attention_12_slice_value_cache_heads_0)[name = string("attention_12_attention_0")]; + bool attention_12_scores_1_transpose_y_0 = const()[name = string("attention_12_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_12_scores_1_transpose_x_0 = const()[name = string("attention_12_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_12_scores_1 = matmul(transpose_x = attention_12_scores_1_transpose_x_0, transpose_y = attention_12_scores_1_transpose_y_0, x = attention_12_key_cache_head_1, y = attention_12_q_splits_1)[name = string("attention_12_scores_1")]; + fp16 attention_12_scaled_scores_1_y_0 = const()[name = string("attention_12_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_12_scaled_scores_1 = mul(x = attention_12_scores_1, y = attention_12_scaled_scores_1_y_0)[name = string("attention_12_scaled_scores_1")]; + tensor attention_12_masked_scaled_scores_1 = add(x = attention_12_scaled_scores_1, y = transpose_0)[name = string("attention_12_masked_scaled_scores_1")]; + int32 softmax_25_axis_0 = const()[name = string("softmax_25_axis_0"), val = int32(-2)]; + tensor softmax_25 = softmax(axis = softmax_25_axis_0, x = attention_12_masked_scaled_scores_1)[name = string("softmax_25")]; + bool attention_12_attention_1_transpose_x_0 = const()[name = string("attention_12_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_12_attention_1_transpose_y_0 = const()[name = string("attention_12_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_12_attention_1 = matmul(transpose_x = attention_12_attention_1_transpose_x_0, transpose_y = attention_12_attention_1_transpose_y_0, x = softmax_25, y = attention_12_slice_value_cache_heads_1)[name = string("attention_12_attention_1")]; + int32 attention_12_concat_attention_all_heads_axis_0 = const()[name = string("attention_12_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_12_concat_attention_all_heads_interleave_0 = const()[name = string("attention_12_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_12_concat_attention_all_heads = concat(axis = attention_12_concat_attention_all_heads_axis_0, interleave = attention_12_concat_attention_all_heads_interleave_0, values = (attention_12_attention_0, attention_12_attention_1))[name = string("attention_12_concat_attention_all_heads")]; + tensor attention_12_channels_first_retransposed_perm_0 = const()[name = string("attention_12_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_12_reshaped_shape_0 = const()[name = string("attention_12_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_12_channels_first_retransposed = transpose(perm = attention_12_channels_first_retransposed_perm_0, x = attention_12_concat_attention_all_heads)[name = string("transpose_23")]; + tensor attention_12_reshaped = reshape(shape = attention_12_reshaped_shape_0, x = attention_12_channels_first_retransposed)[name = string("attention_12_reshaped")]; + tensor attention_12_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431387648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431989824))))[name = string("attention_12_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_56 = constexpr_blockwise_shift_scale(data = attention_12_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432018560))))[name = string("constexpr_blockwise_shift_scale_56")]; + tensor attention_12_outproj_strides_0 = const()[name = string("attention_12_outproj_strides_0"), val = tensor([1])]; + string attention_12_outproj_pad_type_0 = const()[name = string("attention_12_outproj_pad_type_0"), val = string("valid")]; + tensor attention_12_outproj_pad_0 = const()[name = string("attention_12_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_12_outproj_dilations_0 = const()[name = string("attention_12_outproj_dilations_0"), val = tensor([1])]; + int32 attention_12_outproj_groups_0 = const()[name = string("attention_12_outproj_groups_0"), val = int32(1)]; + tensor attention_12_outproj = conv(dilations = attention_12_outproj_dilations_0, groups = attention_12_outproj_groups_0, pad = attention_12_outproj_pad_0, pad_type = attention_12_outproj_pad_type_0, strides = attention_12_outproj_strides_0, weight = constexpr_blockwise_shift_scale_56, x = attention_12_reshaped)[name = string("attention_12_outproj")]; + tensor block_12_residual_1 = add(x = block_11_residual_2, y = attention_12_outproj)[name = string("block_12_residual_1")]; + tensor block_12_ffn_rmsnorm_abs = abs(x = block_12_residual_1)[name = string("block_12_ffn_rmsnorm_abs")]; + tensor block_12_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_12_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_12_ffn_rmsnorm_maxval = reduce_max(axes = block_12_ffn_rmsnorm_maxval_axes_0, keep_dims = block_12_ffn_rmsnorm_maxval_keep_dims_0, x = block_12_ffn_rmsnorm_abs)[name = string("block_12_ffn_rmsnorm_maxval")]; + fp16 block_12_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_12_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_12_ffn_rmsnorm_maxval_clipped = clip(alpha = block_12_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_12_ffn_rmsnorm_maxval_clipped_beta_0, x = block_12_ffn_rmsnorm_maxval)[name = string("block_12_ffn_rmsnorm_maxval_clipped")]; + tensor block_12_ffn_rmsnorm_scaled = real_div(x = block_12_residual_1, y = block_12_ffn_rmsnorm_maxval_clipped)[name = string("block_12_ffn_rmsnorm_scaled")]; + tensor block_12_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_12_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_12_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_12_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_12_ffn_rmsnorm_scaled)[name = string("block_12_ffn_rmsnorm_squared_sum")]; + fp16 block_12_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_12_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_12_ffn_rmsnorm_rsqrt_epsilon_0, x = block_12_ffn_rmsnorm_squared_sum)[name = string("block_12_ffn_rmsnorm_rsqrt")]; + fp16 block_12_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_12_ffn_rmsnorm_dim_scaled = mul(x = block_12_ffn_rmsnorm_scaled, y = block_12_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_12_ffn_rmsnorm_dim_scaled")]; + tensor block_12_ffn_rmsnorm_normalized = mul(x = block_12_ffn_rmsnorm_dim_scaled, y = block_12_ffn_rmsnorm_rsqrt)[name = string("block_12_ffn_rmsnorm_normalized")]; + tensor block_12_ffn_rmsnorm_y_0 = const()[name = string("block_12_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432020416)))]; + tensor block_12_ffn_rmsnorm = mul(x = block_12_ffn_rmsnorm_normalized, y = block_12_ffn_rmsnorm_y_0)[name = string("block_12_ffn_rmsnorm")]; + tensor block_12_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432022272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435290944))))[name = string("block_12_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_57 = constexpr_blockwise_shift_scale(data = block_12_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435446656))))[name = string("constexpr_blockwise_shift_scale_57")]; + tensor block_12_ffn_inproj_strides_0 = const()[name = string("block_12_ffn_inproj_strides_0"), val = tensor([1])]; + string block_12_ffn_inproj_pad_type_0 = const()[name = string("block_12_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_12_ffn_inproj_pad_0 = const()[name = string("block_12_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_12_ffn_inproj_dilations_0 = const()[name = string("block_12_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_12_ffn_inproj_groups_0 = const()[name = string("block_12_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_12_ffn_inproj = conv(dilations = block_12_ffn_inproj_dilations_0, groups = block_12_ffn_inproj_groups_0, pad = block_12_ffn_inproj_pad_0, pad_type = block_12_ffn_inproj_pad_type_0, strides = block_12_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_57, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_inproj")]; + tensor block_12_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435456448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438725120))))[name = string("block_12_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_58 = constexpr_blockwise_shift_scale(data = block_12_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438880832))))[name = string("constexpr_blockwise_shift_scale_58")]; + tensor block_12_ffn_g_strides_0 = const()[name = string("block_12_ffn_g_strides_0"), val = tensor([1])]; + string block_12_ffn_g_pad_type_0 = const()[name = string("block_12_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_12_ffn_g_pad_0 = const()[name = string("block_12_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_12_ffn_g_dilations_0 = const()[name = string("block_12_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_12_ffn_g_groups_0 = const()[name = string("block_12_ffn_g_groups_0"), val = int32(1)]; + tensor block_12_ffn_g = conv(dilations = block_12_ffn_g_dilations_0, groups = block_12_ffn_g_groups_0, pad = block_12_ffn_g_pad_0, pad_type = block_12_ffn_g_pad_type_0, strides = block_12_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_58, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_g")]; + tensor block_12_ffn_g_activation = silu(x = block_12_ffn_g)[name = string("block_12_ffn_g_activation")]; + tensor block_12_ffn_x_gated = mul(x = block_12_ffn_inproj, y = block_12_ffn_g_activation)[name = string("block_12_ffn_x_gated")]; + tensor block_12_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438890624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442159296))))[name = string("block_12_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_59 = constexpr_blockwise_shift_scale(data = block_12_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442188032))))[name = string("constexpr_blockwise_shift_scale_59")]; + tensor block_12_ffn_outproj_strides_0 = const()[name = string("block_12_ffn_outproj_strides_0"), val = tensor([1])]; + string block_12_ffn_outproj_pad_type_0 = const()[name = string("block_12_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_12_ffn_outproj_pad_0 = const()[name = string("block_12_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_12_ffn_outproj_dilations_0 = const()[name = string("block_12_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_12_ffn_outproj_groups_0 = const()[name = string("block_12_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_12_ffn_outproj = conv(dilations = block_12_ffn_outproj_dilations_0, groups = block_12_ffn_outproj_groups_0, pad = block_12_ffn_outproj_pad_0, pad_type = block_12_ffn_outproj_pad_type_0, strides = block_12_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_59, x = block_12_ffn_x_gated)[name = string("block_12_ffn_outproj")]; + tensor block_12_residual_2 = add(x = block_12_ffn_outproj, y = block_12_residual_1)[name = string("block_12_residual_2")]; + tensor block_13_attention_rmsnorm_abs = abs(x = block_12_residual_2)[name = string("block_13_attention_rmsnorm_abs")]; + tensor block_13_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_13_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_13_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_13_attention_rmsnorm_maxval = reduce_max(axes = block_13_attention_rmsnorm_maxval_axes_0, keep_dims = block_13_attention_rmsnorm_maxval_keep_dims_0, x = block_13_attention_rmsnorm_abs)[name = string("block_13_attention_rmsnorm_maxval")]; + fp16 block_13_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_13_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_13_attention_rmsnorm_maxval_clipped = clip(alpha = block_13_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_13_attention_rmsnorm_maxval_clipped_beta_0, x = block_13_attention_rmsnorm_maxval)[name = string("block_13_attention_rmsnorm_maxval_clipped")]; + tensor block_13_attention_rmsnorm_scaled = real_div(x = block_12_residual_2, y = block_13_attention_rmsnorm_maxval_clipped)[name = string("block_13_attention_rmsnorm_scaled")]; + tensor block_13_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_13_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_13_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_13_attention_rmsnorm_squared_sum_keep_dims_0, x = block_13_attention_rmsnorm_scaled)[name = string("block_13_attention_rmsnorm_squared_sum")]; + fp16 block_13_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_13_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_13_attention_rmsnorm_rsqrt_epsilon_0, x = block_13_attention_rmsnorm_squared_sum)[name = string("block_13_attention_rmsnorm_rsqrt")]; + fp16 block_13_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_13_attention_rmsnorm_dim_scaled = mul(x = block_13_attention_rmsnorm_scaled, y = block_13_attention_rmsnorm_dim_scaled_y_0)[name = string("block_13_attention_rmsnorm_dim_scaled")]; + tensor block_13_attention_rmsnorm_normalized = mul(x = block_13_attention_rmsnorm_dim_scaled, y = block_13_attention_rmsnorm_rsqrt)[name = string("block_13_attention_rmsnorm_normalized")]; + tensor block_13_attention_rmsnorm_y_0 = const()[name = string("block_13_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442189888)))]; + tensor block_13_attention_rmsnorm = mul(x = block_13_attention_rmsnorm_normalized, y = block_13_attention_rmsnorm_y_0)[name = string("block_13_attention_rmsnorm")]; + tensor attention_13_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442191744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442965952))))[name = string("attention_13_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_60 = constexpr_blockwise_shift_scale(data = attention_13_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443002880))))[name = string("constexpr_blockwise_shift_scale_60")]; + tensor attention_13_qkvproj_bias_0 = const()[name = string("attention_13_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443005248)))]; + tensor attention_13_qkvproj_strides_0 = const()[name = string("attention_13_qkvproj_strides_0"), val = tensor([1])]; + string attention_13_qkvproj_pad_type_0 = const()[name = string("attention_13_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_13_qkvproj_pad_0 = const()[name = string("attention_13_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_13_qkvproj_dilations_0 = const()[name = string("attention_13_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_13_qkvproj_groups_0 = const()[name = string("attention_13_qkvproj_groups_0"), val = int32(1)]; + tensor attention_13_qkvproj = conv(bias = attention_13_qkvproj_bias_0, dilations = attention_13_qkvproj_dilations_0, groups = attention_13_qkvproj_groups_0, pad = attention_13_qkvproj_pad_0, pad_type = attention_13_qkvproj_pad_type_0, strides = attention_13_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_60, x = block_13_attention_rmsnorm)[name = string("attention_13_qkvproj")]; + tensor attention_13_head_reshape_shape_0 = const()[name = string("attention_13_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_13_head_reshape = reshape(shape = attention_13_head_reshape_shape_0, x = attention_13_qkvproj)[name = string("attention_13_head_reshape")]; + tensor attention_13_head_transpose_perm_0 = const()[name = string("attention_13_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_13_split_qkv_heads_axis_0 = const()[name = string("attention_13_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_13_split_qkv_heads_split_sizes_0 = const()[name = string("attention_13_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_13_head_transpose = transpose(perm = attention_13_head_transpose_perm_0, x = attention_13_head_reshape)[name = string("transpose_22")]; + tensor attention_13_split_qkv_heads_0, tensor attention_13_split_qkv_heads_1, tensor attention_13_split_qkv_heads_2 = split(axis = attention_13_split_qkv_heads_axis_0, split_sizes = attention_13_split_qkv_heads_split_sizes_0, x = attention_13_head_transpose)[name = string("attention_13_split_qkv_heads")]; + tensor attention_13_q_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_13_q_rope_lhs_mult")]; + int32 attention_13_q_rotate_half_split_num_splits_0 = const()[name = string("attention_13_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_13_q_rotate_half_split_axis_0 = const()[name = string("attention_13_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_13_q_rotate_half_split_0, tensor attention_13_q_rotate_half_split_1 = split(axis = attention_13_q_rotate_half_split_axis_0, num_splits = attention_13_q_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_0)[name = string("attention_13_q_rotate_half_split")]; + fp16 attention_13_q_rotate_half_neg_y_0 = const()[name = string("attention_13_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_13_q_rotate_half_neg = mul(x = attention_13_q_rotate_half_split_1, y = attention_13_q_rotate_half_neg_y_0)[name = string("attention_13_q_rotate_half_neg")]; + int32 attention_13_q_rotate_half_concat_axis_0 = const()[name = string("attention_13_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_13_q_rotate_half_concat_interleave_0 = const()[name = string("attention_13_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_13_q_rotate_half_concat = concat(axis = attention_13_q_rotate_half_concat_axis_0, interleave = attention_13_q_rotate_half_concat_interleave_0, values = (attention_13_q_rotate_half_neg, attention_13_q_rotate_half_split_0))[name = string("attention_13_q_rotate_half_concat")]; + tensor attention_13_q_rope_rhs_mult = mul(x = attention_13_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_q_rope_rhs_mult")]; + tensor attention_13_q_rope = add(x = attention_13_q_rope_lhs_mult, y = attention_13_q_rope_rhs_mult)[name = string("attention_13_q_rope")]; + tensor attention_13_k_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_13_k_rope_lhs_mult")]; + int32 attention_13_k_rotate_half_split_num_splits_0 = const()[name = string("attention_13_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_13_k_rotate_half_split_axis_0 = const()[name = string("attention_13_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_13_k_rotate_half_split_0, tensor attention_13_k_rotate_half_split_1 = split(axis = attention_13_k_rotate_half_split_axis_0, num_splits = attention_13_k_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_1)[name = string("attention_13_k_rotate_half_split")]; + fp16 attention_13_k_rotate_half_neg_y_0 = const()[name = string("attention_13_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_13_k_rotate_half_neg = mul(x = attention_13_k_rotate_half_split_1, y = attention_13_k_rotate_half_neg_y_0)[name = string("attention_13_k_rotate_half_neg")]; + int32 attention_13_k_rotate_half_concat_axis_0 = const()[name = string("attention_13_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_13_k_rotate_half_concat_interleave_0 = const()[name = string("attention_13_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_13_k_rotate_half_concat = concat(axis = attention_13_k_rotate_half_concat_axis_0, interleave = attention_13_k_rotate_half_concat_interleave_0, values = (attention_13_k_rotate_half_neg, attention_13_k_rotate_half_split_0))[name = string("attention_13_k_rotate_half_concat")]; + tensor attention_13_k_rope_rhs_mult = mul(x = attention_13_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_k_rope_rhs_mult")]; + tensor attention_13_k_rope = add(x = attention_13_k_rope_lhs_mult, y = attention_13_k_rope_rhs_mult)[name = string("attention_13_k_rope")]; + int32 attention_13_q_splits_axis_0 = const()[name = string("attention_13_q_splits_axis_0"), val = int32(1)]; + int32 attention_13_q_splits_num_splits_0 = const()[name = string("attention_13_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_13_q_splits_0, tensor attention_13_q_splits_1 = split(axis = attention_13_q_splits_axis_0, num_splits = attention_13_q_splits_num_splits_0, x = attention_13_q_rope)[name = string("attention_13_q_splits")]; + tensor attention_13_update_begin_0_values0_0 = const()[name = string("attention_13_update_begin_0_values0_0"), val = tensor([13])]; + tensor attention_13_update_begin_0_values1_0 = const()[name = string("attention_13_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_13_update_begin_0_values3_0 = const()[name = string("attention_13_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_13_update_begin_0_axis_0 = const()[name = string("attention_13_update_begin_0_axis_0"), val = int32(0)]; + bool attention_13_update_begin_0_interleave_0 = const()[name = string("attention_13_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_13_update_begin_0 = concat(axis = attention_13_update_begin_0_axis_0, interleave = attention_13_update_begin_0_interleave_0, values = (attention_13_update_begin_0_values0_0, attention_13_update_begin_0_values1_0, query_pos1, attention_13_update_begin_0_values3_0))[name = string("attention_13_update_begin_0")]; + tensor attention_13_update_end_0_values0_0 = const()[name = string("attention_13_update_end_0_values0_0"), val = tensor([14])]; + tensor attention_13_update_end_0_values1_0 = const()[name = string("attention_13_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_13_update_end_0_values3_0 = const()[name = string("attention_13_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_13_update_end_0_axis_0 = const()[name = string("attention_13_update_end_0_axis_0"), val = int32(0)]; + bool attention_13_update_end_0_interleave_0 = const()[name = string("attention_13_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_13_update_end_0 = concat(axis = attention_13_update_end_0_axis_0, interleave = attention_13_update_end_0_interleave_0, values = (attention_13_update_end_0_values0_0, attention_13_update_end_0_values1_0, end_pos_0, attention_13_update_end_0_values3_0))[name = string("attention_13_update_end_0")]; + tensor attention_13_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_updated_key_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_key_cache_0_squeeze_mask_0, update = attention_13_k_rope, x = coreml_update_state_24)[name = string("attention_13_updated_key_cache_0")]; + write_state(data = attention_13_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_26_write_state")]; + tensor coreml_update_state_26 = read_state(input = key_cache_state)[name = string("coreml_update_state_26")]; + tensor attention_13_key_cache_begin_0 = const()[name = string("attention_13_key_cache_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor attention_13_key_cache_end_0 = const()[name = string("attention_13_key_cache_end_0"), val = tensor([14, 2, 512, 64])]; + tensor attention_13_key_cache_squeeze_mask_0 = const()[name = string("attention_13_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_key_cache = slice_by_index(begin = attention_13_key_cache_begin_0, end = attention_13_key_cache_end_0, squeeze_mask = attention_13_key_cache_squeeze_mask_0, x = coreml_update_state_26)[name = string("attention_13_key_cache")]; + int32 attention_13_key_cache_head_axis_0 = const()[name = string("attention_13_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_13_key_cache_head_num_splits_0 = const()[name = string("attention_13_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_13_key_cache_head_0, tensor attention_13_key_cache_head_1 = split(axis = attention_13_key_cache_head_axis_0, num_splits = attention_13_key_cache_head_num_splits_0, x = attention_13_key_cache)[name = string("attention_13_key_cache_head")]; + tensor attention_13_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_updated_value_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_value_cache_0_squeeze_mask_0, update = attention_13_split_qkv_heads_2, x = coreml_update_state_25)[name = string("attention_13_updated_value_cache_0")]; + write_state(data = attention_13_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_27_write_state")]; + tensor coreml_update_state_27 = read_state(input = value_cache_state)[name = string("coreml_update_state_27")]; + tensor attention_13_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_13_slice_current_layer_value_cache_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor attention_13_slice_current_layer_value_cache_end_0 = const()[name = string("attention_13_slice_current_layer_value_cache_end_0"), val = tensor([14, 2, 512, 64])]; + tensor attention_13_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_13_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_slice_current_layer_value_cache = slice_by_index(begin = attention_13_slice_current_layer_value_cache_begin_0, end = attention_13_slice_current_layer_value_cache_end_0, squeeze_mask = attention_13_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_27)[name = string("attention_13_slice_current_layer_value_cache")]; + int32 attention_13_slice_value_cache_heads_axis_0 = const()[name = string("attention_13_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_13_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_13_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_13_slice_value_cache_heads_0, tensor attention_13_slice_value_cache_heads_1 = split(axis = attention_13_slice_value_cache_heads_axis_0, num_splits = attention_13_slice_value_cache_heads_num_splits_0, x = attention_13_slice_current_layer_value_cache)[name = string("attention_13_slice_value_cache_heads")]; + bool attention_13_scores_0_transpose_y_0 = const()[name = string("attention_13_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_13_scores_0_transpose_x_0 = const()[name = string("attention_13_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_13_scores_0 = matmul(transpose_x = attention_13_scores_0_transpose_x_0, transpose_y = attention_13_scores_0_transpose_y_0, x = attention_13_key_cache_head_0, y = attention_13_q_splits_0)[name = string("attention_13_scores_0")]; + fp16 attention_13_scaled_scores_0_y_0 = const()[name = string("attention_13_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_13_scaled_scores_0 = mul(x = attention_13_scores_0, y = attention_13_scaled_scores_0_y_0)[name = string("attention_13_scaled_scores_0")]; + tensor attention_13_masked_scaled_scores_0 = add(x = attention_13_scaled_scores_0, y = transpose_0)[name = string("attention_13_masked_scaled_scores_0")]; + int32 softmax_26_axis_0 = const()[name = string("softmax_26_axis_0"), val = int32(-2)]; + tensor softmax_26 = softmax(axis = softmax_26_axis_0, x = attention_13_masked_scaled_scores_0)[name = string("softmax_26")]; + bool attention_13_attention_0_transpose_x_0 = const()[name = string("attention_13_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_13_attention_0_transpose_y_0 = const()[name = string("attention_13_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_13_attention_0 = matmul(transpose_x = attention_13_attention_0_transpose_x_0, transpose_y = attention_13_attention_0_transpose_y_0, x = softmax_26, y = attention_13_slice_value_cache_heads_0)[name = string("attention_13_attention_0")]; + bool attention_13_scores_1_transpose_y_0 = const()[name = string("attention_13_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_13_scores_1_transpose_x_0 = const()[name = string("attention_13_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_13_scores_1 = matmul(transpose_x = attention_13_scores_1_transpose_x_0, transpose_y = attention_13_scores_1_transpose_y_0, x = attention_13_key_cache_head_1, y = attention_13_q_splits_1)[name = string("attention_13_scores_1")]; + fp16 attention_13_scaled_scores_1_y_0 = const()[name = string("attention_13_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_13_scaled_scores_1 = mul(x = attention_13_scores_1, y = attention_13_scaled_scores_1_y_0)[name = string("attention_13_scaled_scores_1")]; + tensor attention_13_masked_scaled_scores_1 = add(x = attention_13_scaled_scores_1, y = transpose_0)[name = string("attention_13_masked_scaled_scores_1")]; + int32 softmax_27_axis_0 = const()[name = string("softmax_27_axis_0"), val = int32(-2)]; + tensor softmax_27 = softmax(axis = softmax_27_axis_0, x = attention_13_masked_scaled_scores_1)[name = string("softmax_27")]; + bool attention_13_attention_1_transpose_x_0 = const()[name = string("attention_13_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_13_attention_1_transpose_y_0 = const()[name = string("attention_13_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_13_attention_1 = matmul(transpose_x = attention_13_attention_1_transpose_x_0, transpose_y = attention_13_attention_1_transpose_y_0, x = softmax_27, y = attention_13_slice_value_cache_heads_1)[name = string("attention_13_attention_1")]; + int32 attention_13_concat_attention_all_heads_axis_0 = const()[name = string("attention_13_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_13_concat_attention_all_heads_interleave_0 = const()[name = string("attention_13_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_13_concat_attention_all_heads = concat(axis = attention_13_concat_attention_all_heads_axis_0, interleave = attention_13_concat_attention_all_heads_interleave_0, values = (attention_13_attention_0, attention_13_attention_1))[name = string("attention_13_concat_attention_all_heads")]; + tensor attention_13_channels_first_retransposed_perm_0 = const()[name = string("attention_13_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_13_reshaped_shape_0 = const()[name = string("attention_13_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_13_channels_first_retransposed = transpose(perm = attention_13_channels_first_retransposed_perm_0, x = attention_13_concat_attention_all_heads)[name = string("transpose_21")]; + tensor attention_13_reshaped = reshape(shape = attention_13_reshaped_shape_0, x = attention_13_channels_first_retransposed)[name = string("attention_13_reshaped")]; + tensor attention_13_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443007616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443609792))))[name = string("attention_13_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_61 = constexpr_blockwise_shift_scale(data = attention_13_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443638528))))[name = string("constexpr_blockwise_shift_scale_61")]; + tensor attention_13_outproj_strides_0 = const()[name = string("attention_13_outproj_strides_0"), val = tensor([1])]; + string attention_13_outproj_pad_type_0 = const()[name = string("attention_13_outproj_pad_type_0"), val = string("valid")]; + tensor attention_13_outproj_pad_0 = const()[name = string("attention_13_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_13_outproj_dilations_0 = const()[name = string("attention_13_outproj_dilations_0"), val = tensor([1])]; + int32 attention_13_outproj_groups_0 = const()[name = string("attention_13_outproj_groups_0"), val = int32(1)]; + tensor attention_13_outproj = conv(dilations = attention_13_outproj_dilations_0, groups = attention_13_outproj_groups_0, pad = attention_13_outproj_pad_0, pad_type = attention_13_outproj_pad_type_0, strides = attention_13_outproj_strides_0, weight = constexpr_blockwise_shift_scale_61, x = attention_13_reshaped)[name = string("attention_13_outproj")]; + tensor block_13_residual_1 = add(x = block_12_residual_2, y = attention_13_outproj)[name = string("block_13_residual_1")]; + tensor block_13_ffn_rmsnorm_abs = abs(x = block_13_residual_1)[name = string("block_13_ffn_rmsnorm_abs")]; + tensor block_13_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_13_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_13_ffn_rmsnorm_maxval = reduce_max(axes = block_13_ffn_rmsnorm_maxval_axes_0, keep_dims = block_13_ffn_rmsnorm_maxval_keep_dims_0, x = block_13_ffn_rmsnorm_abs)[name = string("block_13_ffn_rmsnorm_maxval")]; + fp16 block_13_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_13_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_13_ffn_rmsnorm_maxval_clipped = clip(alpha = block_13_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_13_ffn_rmsnorm_maxval_clipped_beta_0, x = block_13_ffn_rmsnorm_maxval)[name = string("block_13_ffn_rmsnorm_maxval_clipped")]; + tensor block_13_ffn_rmsnorm_scaled = real_div(x = block_13_residual_1, y = block_13_ffn_rmsnorm_maxval_clipped)[name = string("block_13_ffn_rmsnorm_scaled")]; + tensor block_13_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_13_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_13_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_13_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_13_ffn_rmsnorm_scaled)[name = string("block_13_ffn_rmsnorm_squared_sum")]; + fp16 block_13_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_13_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_13_ffn_rmsnorm_rsqrt_epsilon_0, x = block_13_ffn_rmsnorm_squared_sum)[name = string("block_13_ffn_rmsnorm_rsqrt")]; + fp16 block_13_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_13_ffn_rmsnorm_dim_scaled = mul(x = block_13_ffn_rmsnorm_scaled, y = block_13_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_13_ffn_rmsnorm_dim_scaled")]; + tensor block_13_ffn_rmsnorm_normalized = mul(x = block_13_ffn_rmsnorm_dim_scaled, y = block_13_ffn_rmsnorm_rsqrt)[name = string("block_13_ffn_rmsnorm_normalized")]; + tensor block_13_ffn_rmsnorm_y_0 = const()[name = string("block_13_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443640384)))]; + tensor block_13_ffn_rmsnorm = mul(x = block_13_ffn_rmsnorm_normalized, y = block_13_ffn_rmsnorm_y_0)[name = string("block_13_ffn_rmsnorm")]; + tensor block_13_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443642240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446910912))))[name = string("block_13_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_62 = constexpr_blockwise_shift_scale(data = block_13_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447066624))))[name = string("constexpr_blockwise_shift_scale_62")]; + tensor block_13_ffn_inproj_strides_0 = const()[name = string("block_13_ffn_inproj_strides_0"), val = tensor([1])]; + string block_13_ffn_inproj_pad_type_0 = const()[name = string("block_13_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_13_ffn_inproj_pad_0 = const()[name = string("block_13_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_13_ffn_inproj_dilations_0 = const()[name = string("block_13_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_13_ffn_inproj_groups_0 = const()[name = string("block_13_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_13_ffn_inproj = conv(dilations = block_13_ffn_inproj_dilations_0, groups = block_13_ffn_inproj_groups_0, pad = block_13_ffn_inproj_pad_0, pad_type = block_13_ffn_inproj_pad_type_0, strides = block_13_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_62, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_inproj")]; + tensor block_13_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447076416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450345088))))[name = string("block_13_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_63 = constexpr_blockwise_shift_scale(data = block_13_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450500800))))[name = string("constexpr_blockwise_shift_scale_63")]; + tensor block_13_ffn_g_strides_0 = const()[name = string("block_13_ffn_g_strides_0"), val = tensor([1])]; + string block_13_ffn_g_pad_type_0 = const()[name = string("block_13_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_13_ffn_g_pad_0 = const()[name = string("block_13_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_13_ffn_g_dilations_0 = const()[name = string("block_13_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_13_ffn_g_groups_0 = const()[name = string("block_13_ffn_g_groups_0"), val = int32(1)]; + tensor block_13_ffn_g = conv(dilations = block_13_ffn_g_dilations_0, groups = block_13_ffn_g_groups_0, pad = block_13_ffn_g_pad_0, pad_type = block_13_ffn_g_pad_type_0, strides = block_13_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_63, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_g")]; + tensor block_13_ffn_g_activation = silu(x = block_13_ffn_g)[name = string("block_13_ffn_g_activation")]; + tensor block_13_ffn_x_gated = mul(x = block_13_ffn_inproj, y = block_13_ffn_g_activation)[name = string("block_13_ffn_x_gated")]; + tensor block_13_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450510592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453779264))))[name = string("block_13_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_64 = constexpr_blockwise_shift_scale(data = block_13_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453808000))))[name = string("constexpr_blockwise_shift_scale_64")]; + tensor block_13_ffn_outproj_strides_0 = const()[name = string("block_13_ffn_outproj_strides_0"), val = tensor([1])]; + string block_13_ffn_outproj_pad_type_0 = const()[name = string("block_13_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_13_ffn_outproj_pad_0 = const()[name = string("block_13_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_13_ffn_outproj_dilations_0 = const()[name = string("block_13_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_13_ffn_outproj_groups_0 = const()[name = string("block_13_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_13_ffn_outproj = conv(dilations = block_13_ffn_outproj_dilations_0, groups = block_13_ffn_outproj_groups_0, pad = block_13_ffn_outproj_pad_0, pad_type = block_13_ffn_outproj_pad_type_0, strides = block_13_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_64, x = block_13_ffn_x_gated)[name = string("block_13_ffn_outproj")]; + tensor block_13_residual_2 = add(x = block_13_ffn_outproj, y = block_13_residual_1)[name = string("block_13_residual_2")]; + tensor block_14_attention_rmsnorm_abs = abs(x = block_13_residual_2)[name = string("block_14_attention_rmsnorm_abs")]; + tensor block_14_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_14_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_14_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_14_attention_rmsnorm_maxval = reduce_max(axes = block_14_attention_rmsnorm_maxval_axes_0, keep_dims = block_14_attention_rmsnorm_maxval_keep_dims_0, x = block_14_attention_rmsnorm_abs)[name = string("block_14_attention_rmsnorm_maxval")]; + fp16 block_14_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_14_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_14_attention_rmsnorm_maxval_clipped = clip(alpha = block_14_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_14_attention_rmsnorm_maxval_clipped_beta_0, x = block_14_attention_rmsnorm_maxval)[name = string("block_14_attention_rmsnorm_maxval_clipped")]; + tensor block_14_attention_rmsnorm_scaled = real_div(x = block_13_residual_2, y = block_14_attention_rmsnorm_maxval_clipped)[name = string("block_14_attention_rmsnorm_scaled")]; + tensor block_14_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_14_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_14_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_14_attention_rmsnorm_squared_sum_keep_dims_0, x = block_14_attention_rmsnorm_scaled)[name = string("block_14_attention_rmsnorm_squared_sum")]; + fp16 block_14_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_14_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_14_attention_rmsnorm_rsqrt_epsilon_0, x = block_14_attention_rmsnorm_squared_sum)[name = string("block_14_attention_rmsnorm_rsqrt")]; + fp16 block_14_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_14_attention_rmsnorm_dim_scaled = mul(x = block_14_attention_rmsnorm_scaled, y = block_14_attention_rmsnorm_dim_scaled_y_0)[name = string("block_14_attention_rmsnorm_dim_scaled")]; + tensor block_14_attention_rmsnorm_normalized = mul(x = block_14_attention_rmsnorm_dim_scaled, y = block_14_attention_rmsnorm_rsqrt)[name = string("block_14_attention_rmsnorm_normalized")]; + tensor block_14_attention_rmsnorm_y_0 = const()[name = string("block_14_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453809856)))]; + tensor block_14_attention_rmsnorm = mul(x = block_14_attention_rmsnorm_normalized, y = block_14_attention_rmsnorm_y_0)[name = string("block_14_attention_rmsnorm")]; + tensor attention_14_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454585920))))[name = string("attention_14_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_65 = constexpr_blockwise_shift_scale(data = attention_14_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454622848))))[name = string("constexpr_blockwise_shift_scale_65")]; + tensor attention_14_qkvproj_bias_0 = const()[name = string("attention_14_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454625216)))]; + tensor attention_14_qkvproj_strides_0 = const()[name = string("attention_14_qkvproj_strides_0"), val = tensor([1])]; + string attention_14_qkvproj_pad_type_0 = const()[name = string("attention_14_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_14_qkvproj_pad_0 = const()[name = string("attention_14_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_14_qkvproj_dilations_0 = const()[name = string("attention_14_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_14_qkvproj_groups_0 = const()[name = string("attention_14_qkvproj_groups_0"), val = int32(1)]; + tensor attention_14_qkvproj = conv(bias = attention_14_qkvproj_bias_0, dilations = attention_14_qkvproj_dilations_0, groups = attention_14_qkvproj_groups_0, pad = attention_14_qkvproj_pad_0, pad_type = attention_14_qkvproj_pad_type_0, strides = attention_14_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_65, x = block_14_attention_rmsnorm)[name = string("attention_14_qkvproj")]; + tensor attention_14_head_reshape_shape_0 = const()[name = string("attention_14_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_14_head_reshape = reshape(shape = attention_14_head_reshape_shape_0, x = attention_14_qkvproj)[name = string("attention_14_head_reshape")]; + tensor attention_14_head_transpose_perm_0 = const()[name = string("attention_14_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_14_split_qkv_heads_axis_0 = const()[name = string("attention_14_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_14_split_qkv_heads_split_sizes_0 = const()[name = string("attention_14_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_14_head_transpose = transpose(perm = attention_14_head_transpose_perm_0, x = attention_14_head_reshape)[name = string("transpose_20")]; + tensor attention_14_split_qkv_heads_0, tensor attention_14_split_qkv_heads_1, tensor attention_14_split_qkv_heads_2 = split(axis = attention_14_split_qkv_heads_axis_0, split_sizes = attention_14_split_qkv_heads_split_sizes_0, x = attention_14_head_transpose)[name = string("attention_14_split_qkv_heads")]; + tensor attention_14_q_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_14_q_rope_lhs_mult")]; + int32 attention_14_q_rotate_half_split_num_splits_0 = const()[name = string("attention_14_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_14_q_rotate_half_split_axis_0 = const()[name = string("attention_14_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_14_q_rotate_half_split_0, tensor attention_14_q_rotate_half_split_1 = split(axis = attention_14_q_rotate_half_split_axis_0, num_splits = attention_14_q_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_0)[name = string("attention_14_q_rotate_half_split")]; + fp16 attention_14_q_rotate_half_neg_y_0 = const()[name = string("attention_14_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_14_q_rotate_half_neg = mul(x = attention_14_q_rotate_half_split_1, y = attention_14_q_rotate_half_neg_y_0)[name = string("attention_14_q_rotate_half_neg")]; + int32 attention_14_q_rotate_half_concat_axis_0 = const()[name = string("attention_14_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_14_q_rotate_half_concat_interleave_0 = const()[name = string("attention_14_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_14_q_rotate_half_concat = concat(axis = attention_14_q_rotate_half_concat_axis_0, interleave = attention_14_q_rotate_half_concat_interleave_0, values = (attention_14_q_rotate_half_neg, attention_14_q_rotate_half_split_0))[name = string("attention_14_q_rotate_half_concat")]; + tensor attention_14_q_rope_rhs_mult = mul(x = attention_14_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_q_rope_rhs_mult")]; + tensor attention_14_q_rope = add(x = attention_14_q_rope_lhs_mult, y = attention_14_q_rope_rhs_mult)[name = string("attention_14_q_rope")]; + tensor attention_14_k_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_14_k_rope_lhs_mult")]; + int32 attention_14_k_rotate_half_split_num_splits_0 = const()[name = string("attention_14_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_14_k_rotate_half_split_axis_0 = const()[name = string("attention_14_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_14_k_rotate_half_split_0, tensor attention_14_k_rotate_half_split_1 = split(axis = attention_14_k_rotate_half_split_axis_0, num_splits = attention_14_k_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_1)[name = string("attention_14_k_rotate_half_split")]; + fp16 attention_14_k_rotate_half_neg_y_0 = const()[name = string("attention_14_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_14_k_rotate_half_neg = mul(x = attention_14_k_rotate_half_split_1, y = attention_14_k_rotate_half_neg_y_0)[name = string("attention_14_k_rotate_half_neg")]; + int32 attention_14_k_rotate_half_concat_axis_0 = const()[name = string("attention_14_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_14_k_rotate_half_concat_interleave_0 = const()[name = string("attention_14_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_14_k_rotate_half_concat = concat(axis = attention_14_k_rotate_half_concat_axis_0, interleave = attention_14_k_rotate_half_concat_interleave_0, values = (attention_14_k_rotate_half_neg, attention_14_k_rotate_half_split_0))[name = string("attention_14_k_rotate_half_concat")]; + tensor attention_14_k_rope_rhs_mult = mul(x = attention_14_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_k_rope_rhs_mult")]; + tensor attention_14_k_rope = add(x = attention_14_k_rope_lhs_mult, y = attention_14_k_rope_rhs_mult)[name = string("attention_14_k_rope")]; + int32 attention_14_q_splits_axis_0 = const()[name = string("attention_14_q_splits_axis_0"), val = int32(1)]; + int32 attention_14_q_splits_num_splits_0 = const()[name = string("attention_14_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_14_q_splits_0, tensor attention_14_q_splits_1 = split(axis = attention_14_q_splits_axis_0, num_splits = attention_14_q_splits_num_splits_0, x = attention_14_q_rope)[name = string("attention_14_q_splits")]; + tensor attention_14_update_begin_0_values0_0 = const()[name = string("attention_14_update_begin_0_values0_0"), val = tensor([14])]; + tensor attention_14_update_begin_0_values1_0 = const()[name = string("attention_14_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_14_update_begin_0_values3_0 = const()[name = string("attention_14_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_14_update_begin_0_axis_0 = const()[name = string("attention_14_update_begin_0_axis_0"), val = int32(0)]; + bool attention_14_update_begin_0_interleave_0 = const()[name = string("attention_14_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_14_update_begin_0 = concat(axis = attention_14_update_begin_0_axis_0, interleave = attention_14_update_begin_0_interleave_0, values = (attention_14_update_begin_0_values0_0, attention_14_update_begin_0_values1_0, query_pos1, attention_14_update_begin_0_values3_0))[name = string("attention_14_update_begin_0")]; + tensor attention_14_update_end_0_values0_0 = const()[name = string("attention_14_update_end_0_values0_0"), val = tensor([15])]; + tensor attention_14_update_end_0_values1_0 = const()[name = string("attention_14_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_14_update_end_0_values3_0 = const()[name = string("attention_14_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_14_update_end_0_axis_0 = const()[name = string("attention_14_update_end_0_axis_0"), val = int32(0)]; + bool attention_14_update_end_0_interleave_0 = const()[name = string("attention_14_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_14_update_end_0 = concat(axis = attention_14_update_end_0_axis_0, interleave = attention_14_update_end_0_interleave_0, values = (attention_14_update_end_0_values0_0, attention_14_update_end_0_values1_0, end_pos_0, attention_14_update_end_0_values3_0))[name = string("attention_14_update_end_0")]; + tensor attention_14_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_updated_key_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_key_cache_0_squeeze_mask_0, update = attention_14_k_rope, x = coreml_update_state_26)[name = string("attention_14_updated_key_cache_0")]; + write_state(data = attention_14_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_28_write_state")]; + tensor coreml_update_state_28 = read_state(input = key_cache_state)[name = string("coreml_update_state_28")]; + tensor attention_14_key_cache_begin_0 = const()[name = string("attention_14_key_cache_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor attention_14_key_cache_end_0 = const()[name = string("attention_14_key_cache_end_0"), val = tensor([15, 2, 512, 64])]; + tensor attention_14_key_cache_squeeze_mask_0 = const()[name = string("attention_14_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_key_cache = slice_by_index(begin = attention_14_key_cache_begin_0, end = attention_14_key_cache_end_0, squeeze_mask = attention_14_key_cache_squeeze_mask_0, x = coreml_update_state_28)[name = string("attention_14_key_cache")]; + int32 attention_14_key_cache_head_axis_0 = const()[name = string("attention_14_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_14_key_cache_head_num_splits_0 = const()[name = string("attention_14_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_14_key_cache_head_0, tensor attention_14_key_cache_head_1 = split(axis = attention_14_key_cache_head_axis_0, num_splits = attention_14_key_cache_head_num_splits_0, x = attention_14_key_cache)[name = string("attention_14_key_cache_head")]; + tensor attention_14_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_updated_value_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_value_cache_0_squeeze_mask_0, update = attention_14_split_qkv_heads_2, x = coreml_update_state_27)[name = string("attention_14_updated_value_cache_0")]; + write_state(data = attention_14_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_29_write_state")]; + tensor coreml_update_state_29 = read_state(input = value_cache_state)[name = string("coreml_update_state_29")]; + tensor attention_14_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_14_slice_current_layer_value_cache_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor attention_14_slice_current_layer_value_cache_end_0 = const()[name = string("attention_14_slice_current_layer_value_cache_end_0"), val = tensor([15, 2, 512, 64])]; + tensor attention_14_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_14_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_slice_current_layer_value_cache = slice_by_index(begin = attention_14_slice_current_layer_value_cache_begin_0, end = attention_14_slice_current_layer_value_cache_end_0, squeeze_mask = attention_14_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_29)[name = string("attention_14_slice_current_layer_value_cache")]; + int32 attention_14_slice_value_cache_heads_axis_0 = const()[name = string("attention_14_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_14_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_14_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_14_slice_value_cache_heads_0, tensor attention_14_slice_value_cache_heads_1 = split(axis = attention_14_slice_value_cache_heads_axis_0, num_splits = attention_14_slice_value_cache_heads_num_splits_0, x = attention_14_slice_current_layer_value_cache)[name = string("attention_14_slice_value_cache_heads")]; + bool attention_14_scores_0_transpose_y_0 = const()[name = string("attention_14_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_14_scores_0_transpose_x_0 = const()[name = string("attention_14_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_14_scores_0 = matmul(transpose_x = attention_14_scores_0_transpose_x_0, transpose_y = attention_14_scores_0_transpose_y_0, x = attention_14_key_cache_head_0, y = attention_14_q_splits_0)[name = string("attention_14_scores_0")]; + fp16 attention_14_scaled_scores_0_y_0 = const()[name = string("attention_14_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_14_scaled_scores_0 = mul(x = attention_14_scores_0, y = attention_14_scaled_scores_0_y_0)[name = string("attention_14_scaled_scores_0")]; + tensor attention_14_masked_scaled_scores_0 = add(x = attention_14_scaled_scores_0, y = transpose_0)[name = string("attention_14_masked_scaled_scores_0")]; + int32 softmax_28_axis_0 = const()[name = string("softmax_28_axis_0"), val = int32(-2)]; + tensor softmax_28 = softmax(axis = softmax_28_axis_0, x = attention_14_masked_scaled_scores_0)[name = string("softmax_28")]; + bool attention_14_attention_0_transpose_x_0 = const()[name = string("attention_14_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_14_attention_0_transpose_y_0 = const()[name = string("attention_14_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_14_attention_0 = matmul(transpose_x = attention_14_attention_0_transpose_x_0, transpose_y = attention_14_attention_0_transpose_y_0, x = softmax_28, y = attention_14_slice_value_cache_heads_0)[name = string("attention_14_attention_0")]; + bool attention_14_scores_1_transpose_y_0 = const()[name = string("attention_14_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_14_scores_1_transpose_x_0 = const()[name = string("attention_14_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_14_scores_1 = matmul(transpose_x = attention_14_scores_1_transpose_x_0, transpose_y = attention_14_scores_1_transpose_y_0, x = attention_14_key_cache_head_1, y = attention_14_q_splits_1)[name = string("attention_14_scores_1")]; + fp16 attention_14_scaled_scores_1_y_0 = const()[name = string("attention_14_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_14_scaled_scores_1 = mul(x = attention_14_scores_1, y = attention_14_scaled_scores_1_y_0)[name = string("attention_14_scaled_scores_1")]; + tensor attention_14_masked_scaled_scores_1 = add(x = attention_14_scaled_scores_1, y = transpose_0)[name = string("attention_14_masked_scaled_scores_1")]; + int32 softmax_29_axis_0 = const()[name = string("softmax_29_axis_0"), val = int32(-2)]; + tensor softmax_29 = softmax(axis = softmax_29_axis_0, x = attention_14_masked_scaled_scores_1)[name = string("softmax_29")]; + bool attention_14_attention_1_transpose_x_0 = const()[name = string("attention_14_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_14_attention_1_transpose_y_0 = const()[name = string("attention_14_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_14_attention_1 = matmul(transpose_x = attention_14_attention_1_transpose_x_0, transpose_y = attention_14_attention_1_transpose_y_0, x = softmax_29, y = attention_14_slice_value_cache_heads_1)[name = string("attention_14_attention_1")]; + int32 attention_14_concat_attention_all_heads_axis_0 = const()[name = string("attention_14_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_14_concat_attention_all_heads_interleave_0 = const()[name = string("attention_14_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_14_concat_attention_all_heads = concat(axis = attention_14_concat_attention_all_heads_axis_0, interleave = attention_14_concat_attention_all_heads_interleave_0, values = (attention_14_attention_0, attention_14_attention_1))[name = string("attention_14_concat_attention_all_heads")]; + tensor attention_14_channels_first_retransposed_perm_0 = const()[name = string("attention_14_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_14_reshaped_shape_0 = const()[name = string("attention_14_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_14_channels_first_retransposed = transpose(perm = attention_14_channels_first_retransposed_perm_0, x = attention_14_concat_attention_all_heads)[name = string("transpose_19")]; + tensor attention_14_reshaped = reshape(shape = attention_14_reshaped_shape_0, x = attention_14_channels_first_retransposed)[name = string("attention_14_reshaped")]; + tensor attention_14_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454627584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455229760))))[name = string("attention_14_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_66 = constexpr_blockwise_shift_scale(data = attention_14_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455258496))))[name = string("constexpr_blockwise_shift_scale_66")]; + tensor attention_14_outproj_strides_0 = const()[name = string("attention_14_outproj_strides_0"), val = tensor([1])]; + string attention_14_outproj_pad_type_0 = const()[name = string("attention_14_outproj_pad_type_0"), val = string("valid")]; + tensor attention_14_outproj_pad_0 = const()[name = string("attention_14_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_14_outproj_dilations_0 = const()[name = string("attention_14_outproj_dilations_0"), val = tensor([1])]; + int32 attention_14_outproj_groups_0 = const()[name = string("attention_14_outproj_groups_0"), val = int32(1)]; + tensor attention_14_outproj = conv(dilations = attention_14_outproj_dilations_0, groups = attention_14_outproj_groups_0, pad = attention_14_outproj_pad_0, pad_type = attention_14_outproj_pad_type_0, strides = attention_14_outproj_strides_0, weight = constexpr_blockwise_shift_scale_66, x = attention_14_reshaped)[name = string("attention_14_outproj")]; + tensor block_14_residual_1 = add(x = block_13_residual_2, y = attention_14_outproj)[name = string("block_14_residual_1")]; + tensor block_14_ffn_rmsnorm_abs = abs(x = block_14_residual_1)[name = string("block_14_ffn_rmsnorm_abs")]; + tensor block_14_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_14_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_14_ffn_rmsnorm_maxval = reduce_max(axes = block_14_ffn_rmsnorm_maxval_axes_0, keep_dims = block_14_ffn_rmsnorm_maxval_keep_dims_0, x = block_14_ffn_rmsnorm_abs)[name = string("block_14_ffn_rmsnorm_maxval")]; + fp16 block_14_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_14_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_14_ffn_rmsnorm_maxval_clipped = clip(alpha = block_14_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_14_ffn_rmsnorm_maxval_clipped_beta_0, x = block_14_ffn_rmsnorm_maxval)[name = string("block_14_ffn_rmsnorm_maxval_clipped")]; + tensor block_14_ffn_rmsnorm_scaled = real_div(x = block_14_residual_1, y = block_14_ffn_rmsnorm_maxval_clipped)[name = string("block_14_ffn_rmsnorm_scaled")]; + tensor block_14_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_14_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_14_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_14_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_14_ffn_rmsnorm_scaled)[name = string("block_14_ffn_rmsnorm_squared_sum")]; + fp16 block_14_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_14_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_14_ffn_rmsnorm_rsqrt_epsilon_0, x = block_14_ffn_rmsnorm_squared_sum)[name = string("block_14_ffn_rmsnorm_rsqrt")]; + fp16 block_14_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_14_ffn_rmsnorm_dim_scaled = mul(x = block_14_ffn_rmsnorm_scaled, y = block_14_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_14_ffn_rmsnorm_dim_scaled")]; + tensor block_14_ffn_rmsnorm_normalized = mul(x = block_14_ffn_rmsnorm_dim_scaled, y = block_14_ffn_rmsnorm_rsqrt)[name = string("block_14_ffn_rmsnorm_normalized")]; + tensor block_14_ffn_rmsnorm_y_0 = const()[name = string("block_14_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455260352)))]; + tensor block_14_ffn_rmsnorm = mul(x = block_14_ffn_rmsnorm_normalized, y = block_14_ffn_rmsnorm_y_0)[name = string("block_14_ffn_rmsnorm")]; + tensor block_14_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455262208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458530880))))[name = string("block_14_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_67 = constexpr_blockwise_shift_scale(data = block_14_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458686592))))[name = string("constexpr_blockwise_shift_scale_67")]; + tensor block_14_ffn_inproj_strides_0 = const()[name = string("block_14_ffn_inproj_strides_0"), val = tensor([1])]; + string block_14_ffn_inproj_pad_type_0 = const()[name = string("block_14_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_14_ffn_inproj_pad_0 = const()[name = string("block_14_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_14_ffn_inproj_dilations_0 = const()[name = string("block_14_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_14_ffn_inproj_groups_0 = const()[name = string("block_14_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_14_ffn_inproj = conv(dilations = block_14_ffn_inproj_dilations_0, groups = block_14_ffn_inproj_groups_0, pad = block_14_ffn_inproj_pad_0, pad_type = block_14_ffn_inproj_pad_type_0, strides = block_14_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_67, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_inproj")]; + tensor block_14_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458696384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461965056))))[name = string("block_14_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_68 = constexpr_blockwise_shift_scale(data = block_14_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462120768))))[name = string("constexpr_blockwise_shift_scale_68")]; + tensor block_14_ffn_g_strides_0 = const()[name = string("block_14_ffn_g_strides_0"), val = tensor([1])]; + string block_14_ffn_g_pad_type_0 = const()[name = string("block_14_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_14_ffn_g_pad_0 = const()[name = string("block_14_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_14_ffn_g_dilations_0 = const()[name = string("block_14_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_14_ffn_g_groups_0 = const()[name = string("block_14_ffn_g_groups_0"), val = int32(1)]; + tensor block_14_ffn_g = conv(dilations = block_14_ffn_g_dilations_0, groups = block_14_ffn_g_groups_0, pad = block_14_ffn_g_pad_0, pad_type = block_14_ffn_g_pad_type_0, strides = block_14_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_68, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_g")]; + tensor block_14_ffn_g_activation = silu(x = block_14_ffn_g)[name = string("block_14_ffn_g_activation")]; + tensor block_14_ffn_x_gated = mul(x = block_14_ffn_inproj, y = block_14_ffn_g_activation)[name = string("block_14_ffn_x_gated")]; + tensor block_14_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462130560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465399232))))[name = string("block_14_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_69 = constexpr_blockwise_shift_scale(data = block_14_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465427968))))[name = string("constexpr_blockwise_shift_scale_69")]; + tensor block_14_ffn_outproj_strides_0 = const()[name = string("block_14_ffn_outproj_strides_0"), val = tensor([1])]; + string block_14_ffn_outproj_pad_type_0 = const()[name = string("block_14_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_14_ffn_outproj_pad_0 = const()[name = string("block_14_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_14_ffn_outproj_dilations_0 = const()[name = string("block_14_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_14_ffn_outproj_groups_0 = const()[name = string("block_14_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_14_ffn_outproj = conv(dilations = block_14_ffn_outproj_dilations_0, groups = block_14_ffn_outproj_groups_0, pad = block_14_ffn_outproj_pad_0, pad_type = block_14_ffn_outproj_pad_type_0, strides = block_14_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_69, x = block_14_ffn_x_gated)[name = string("block_14_ffn_outproj")]; + tensor block_14_residual_2 = add(x = block_14_ffn_outproj, y = block_14_residual_1)[name = string("block_14_residual_2")]; + tensor block_15_attention_rmsnorm_abs = abs(x = block_14_residual_2)[name = string("block_15_attention_rmsnorm_abs")]; + tensor block_15_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_15_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_15_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_15_attention_rmsnorm_maxval = reduce_max(axes = block_15_attention_rmsnorm_maxval_axes_0, keep_dims = block_15_attention_rmsnorm_maxval_keep_dims_0, x = block_15_attention_rmsnorm_abs)[name = string("block_15_attention_rmsnorm_maxval")]; + fp16 block_15_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_15_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_15_attention_rmsnorm_maxval_clipped = clip(alpha = block_15_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_15_attention_rmsnorm_maxval_clipped_beta_0, x = block_15_attention_rmsnorm_maxval)[name = string("block_15_attention_rmsnorm_maxval_clipped")]; + tensor block_15_attention_rmsnorm_scaled = real_div(x = block_14_residual_2, y = block_15_attention_rmsnorm_maxval_clipped)[name = string("block_15_attention_rmsnorm_scaled")]; + tensor block_15_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_15_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_15_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_15_attention_rmsnorm_squared_sum_keep_dims_0, x = block_15_attention_rmsnorm_scaled)[name = string("block_15_attention_rmsnorm_squared_sum")]; + fp16 block_15_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_15_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_15_attention_rmsnorm_rsqrt_epsilon_0, x = block_15_attention_rmsnorm_squared_sum)[name = string("block_15_attention_rmsnorm_rsqrt")]; + fp16 block_15_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_15_attention_rmsnorm_dim_scaled = mul(x = block_15_attention_rmsnorm_scaled, y = block_15_attention_rmsnorm_dim_scaled_y_0)[name = string("block_15_attention_rmsnorm_dim_scaled")]; + tensor block_15_attention_rmsnorm_normalized = mul(x = block_15_attention_rmsnorm_dim_scaled, y = block_15_attention_rmsnorm_rsqrt)[name = string("block_15_attention_rmsnorm_normalized")]; + tensor block_15_attention_rmsnorm_y_0 = const()[name = string("block_15_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465429824)))]; + tensor block_15_attention_rmsnorm = mul(x = block_15_attention_rmsnorm_normalized, y = block_15_attention_rmsnorm_y_0)[name = string("block_15_attention_rmsnorm")]; + tensor attention_15_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465431680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466205888))))[name = string("attention_15_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_70 = constexpr_blockwise_shift_scale(data = attention_15_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466242816))))[name = string("constexpr_blockwise_shift_scale_70")]; + tensor attention_15_qkvproj_bias_0 = const()[name = string("attention_15_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466245184)))]; + tensor attention_15_qkvproj_strides_0 = const()[name = string("attention_15_qkvproj_strides_0"), val = tensor([1])]; + string attention_15_qkvproj_pad_type_0 = const()[name = string("attention_15_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_15_qkvproj_pad_0 = const()[name = string("attention_15_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_15_qkvproj_dilations_0 = const()[name = string("attention_15_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_15_qkvproj_groups_0 = const()[name = string("attention_15_qkvproj_groups_0"), val = int32(1)]; + tensor attention_15_qkvproj = conv(bias = attention_15_qkvproj_bias_0, dilations = attention_15_qkvproj_dilations_0, groups = attention_15_qkvproj_groups_0, pad = attention_15_qkvproj_pad_0, pad_type = attention_15_qkvproj_pad_type_0, strides = attention_15_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_70, x = block_15_attention_rmsnorm)[name = string("attention_15_qkvproj")]; + tensor attention_15_head_reshape_shape_0 = const()[name = string("attention_15_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_15_head_reshape = reshape(shape = attention_15_head_reshape_shape_0, x = attention_15_qkvproj)[name = string("attention_15_head_reshape")]; + tensor attention_15_head_transpose_perm_0 = const()[name = string("attention_15_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_15_split_qkv_heads_axis_0 = const()[name = string("attention_15_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_15_split_qkv_heads_split_sizes_0 = const()[name = string("attention_15_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_15_head_transpose = transpose(perm = attention_15_head_transpose_perm_0, x = attention_15_head_reshape)[name = string("transpose_18")]; + tensor attention_15_split_qkv_heads_0, tensor attention_15_split_qkv_heads_1, tensor attention_15_split_qkv_heads_2 = split(axis = attention_15_split_qkv_heads_axis_0, split_sizes = attention_15_split_qkv_heads_split_sizes_0, x = attention_15_head_transpose)[name = string("attention_15_split_qkv_heads")]; + tensor attention_15_q_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_15_q_rope_lhs_mult")]; + int32 attention_15_q_rotate_half_split_num_splits_0 = const()[name = string("attention_15_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_15_q_rotate_half_split_axis_0 = const()[name = string("attention_15_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_15_q_rotate_half_split_0, tensor attention_15_q_rotate_half_split_1 = split(axis = attention_15_q_rotate_half_split_axis_0, num_splits = attention_15_q_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_0)[name = string("attention_15_q_rotate_half_split")]; + fp16 attention_15_q_rotate_half_neg_y_0 = const()[name = string("attention_15_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_15_q_rotate_half_neg = mul(x = attention_15_q_rotate_half_split_1, y = attention_15_q_rotate_half_neg_y_0)[name = string("attention_15_q_rotate_half_neg")]; + int32 attention_15_q_rotate_half_concat_axis_0 = const()[name = string("attention_15_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_15_q_rotate_half_concat_interleave_0 = const()[name = string("attention_15_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_15_q_rotate_half_concat = concat(axis = attention_15_q_rotate_half_concat_axis_0, interleave = attention_15_q_rotate_half_concat_interleave_0, values = (attention_15_q_rotate_half_neg, attention_15_q_rotate_half_split_0))[name = string("attention_15_q_rotate_half_concat")]; + tensor attention_15_q_rope_rhs_mult = mul(x = attention_15_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_q_rope_rhs_mult")]; + tensor attention_15_q_rope = add(x = attention_15_q_rope_lhs_mult, y = attention_15_q_rope_rhs_mult)[name = string("attention_15_q_rope")]; + tensor attention_15_k_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_15_k_rope_lhs_mult")]; + int32 attention_15_k_rotate_half_split_num_splits_0 = const()[name = string("attention_15_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_15_k_rotate_half_split_axis_0 = const()[name = string("attention_15_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_15_k_rotate_half_split_0, tensor attention_15_k_rotate_half_split_1 = split(axis = attention_15_k_rotate_half_split_axis_0, num_splits = attention_15_k_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_1)[name = string("attention_15_k_rotate_half_split")]; + fp16 attention_15_k_rotate_half_neg_y_0 = const()[name = string("attention_15_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_15_k_rotate_half_neg = mul(x = attention_15_k_rotate_half_split_1, y = attention_15_k_rotate_half_neg_y_0)[name = string("attention_15_k_rotate_half_neg")]; + int32 attention_15_k_rotate_half_concat_axis_0 = const()[name = string("attention_15_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_15_k_rotate_half_concat_interleave_0 = const()[name = string("attention_15_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_15_k_rotate_half_concat = concat(axis = attention_15_k_rotate_half_concat_axis_0, interleave = attention_15_k_rotate_half_concat_interleave_0, values = (attention_15_k_rotate_half_neg, attention_15_k_rotate_half_split_0))[name = string("attention_15_k_rotate_half_concat")]; + tensor attention_15_k_rope_rhs_mult = mul(x = attention_15_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_k_rope_rhs_mult")]; + tensor attention_15_k_rope = add(x = attention_15_k_rope_lhs_mult, y = attention_15_k_rope_rhs_mult)[name = string("attention_15_k_rope")]; + int32 attention_15_q_splits_axis_0 = const()[name = string("attention_15_q_splits_axis_0"), val = int32(1)]; + int32 attention_15_q_splits_num_splits_0 = const()[name = string("attention_15_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_15_q_splits_0, tensor attention_15_q_splits_1 = split(axis = attention_15_q_splits_axis_0, num_splits = attention_15_q_splits_num_splits_0, x = attention_15_q_rope)[name = string("attention_15_q_splits")]; + tensor attention_15_update_begin_0_values0_0 = const()[name = string("attention_15_update_begin_0_values0_0"), val = tensor([15])]; + tensor attention_15_update_begin_0_values1_0 = const()[name = string("attention_15_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_15_update_begin_0_values3_0 = const()[name = string("attention_15_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_15_update_begin_0_axis_0 = const()[name = string("attention_15_update_begin_0_axis_0"), val = int32(0)]; + bool attention_15_update_begin_0_interleave_0 = const()[name = string("attention_15_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_15_update_begin_0 = concat(axis = attention_15_update_begin_0_axis_0, interleave = attention_15_update_begin_0_interleave_0, values = (attention_15_update_begin_0_values0_0, attention_15_update_begin_0_values1_0, query_pos1, attention_15_update_begin_0_values3_0))[name = string("attention_15_update_begin_0")]; + tensor attention_15_update_end_0_values0_0 = const()[name = string("attention_15_update_end_0_values0_0"), val = tensor([16])]; + tensor attention_15_update_end_0_values1_0 = const()[name = string("attention_15_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_15_update_end_0_values3_0 = const()[name = string("attention_15_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_15_update_end_0_axis_0 = const()[name = string("attention_15_update_end_0_axis_0"), val = int32(0)]; + bool attention_15_update_end_0_interleave_0 = const()[name = string("attention_15_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_15_update_end_0 = concat(axis = attention_15_update_end_0_axis_0, interleave = attention_15_update_end_0_interleave_0, values = (attention_15_update_end_0_values0_0, attention_15_update_end_0_values1_0, end_pos_0, attention_15_update_end_0_values3_0))[name = string("attention_15_update_end_0")]; + tensor attention_15_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_updated_key_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_key_cache_0_squeeze_mask_0, update = attention_15_k_rope, x = coreml_update_state_28)[name = string("attention_15_updated_key_cache_0")]; + write_state(data = attention_15_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_30_write_state")]; + tensor coreml_update_state_30 = read_state(input = key_cache_state)[name = string("coreml_update_state_30")]; + tensor attention_15_key_cache_begin_0 = const()[name = string("attention_15_key_cache_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor attention_15_key_cache_end_0 = const()[name = string("attention_15_key_cache_end_0"), val = tensor([16, 2, 512, 64])]; + tensor attention_15_key_cache_squeeze_mask_0 = const()[name = string("attention_15_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_key_cache = slice_by_index(begin = attention_15_key_cache_begin_0, end = attention_15_key_cache_end_0, squeeze_mask = attention_15_key_cache_squeeze_mask_0, x = coreml_update_state_30)[name = string("attention_15_key_cache")]; + int32 attention_15_key_cache_head_axis_0 = const()[name = string("attention_15_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_15_key_cache_head_num_splits_0 = const()[name = string("attention_15_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_15_key_cache_head_0, tensor attention_15_key_cache_head_1 = split(axis = attention_15_key_cache_head_axis_0, num_splits = attention_15_key_cache_head_num_splits_0, x = attention_15_key_cache)[name = string("attention_15_key_cache_head")]; + tensor attention_15_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_updated_value_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_value_cache_0_squeeze_mask_0, update = attention_15_split_qkv_heads_2, x = coreml_update_state_29)[name = string("attention_15_updated_value_cache_0")]; + write_state(data = attention_15_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_31_write_state")]; + tensor coreml_update_state_31 = read_state(input = value_cache_state)[name = string("coreml_update_state_31")]; + tensor attention_15_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_15_slice_current_layer_value_cache_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor attention_15_slice_current_layer_value_cache_end_0 = const()[name = string("attention_15_slice_current_layer_value_cache_end_0"), val = tensor([16, 2, 512, 64])]; + tensor attention_15_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_15_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_slice_current_layer_value_cache = slice_by_index(begin = attention_15_slice_current_layer_value_cache_begin_0, end = attention_15_slice_current_layer_value_cache_end_0, squeeze_mask = attention_15_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_31)[name = string("attention_15_slice_current_layer_value_cache")]; + int32 attention_15_slice_value_cache_heads_axis_0 = const()[name = string("attention_15_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_15_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_15_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_15_slice_value_cache_heads_0, tensor attention_15_slice_value_cache_heads_1 = split(axis = attention_15_slice_value_cache_heads_axis_0, num_splits = attention_15_slice_value_cache_heads_num_splits_0, x = attention_15_slice_current_layer_value_cache)[name = string("attention_15_slice_value_cache_heads")]; + bool attention_15_scores_0_transpose_y_0 = const()[name = string("attention_15_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_15_scores_0_transpose_x_0 = const()[name = string("attention_15_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_15_scores_0 = matmul(transpose_x = attention_15_scores_0_transpose_x_0, transpose_y = attention_15_scores_0_transpose_y_0, x = attention_15_key_cache_head_0, y = attention_15_q_splits_0)[name = string("attention_15_scores_0")]; + fp16 attention_15_scaled_scores_0_y_0 = const()[name = string("attention_15_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_15_scaled_scores_0 = mul(x = attention_15_scores_0, y = attention_15_scaled_scores_0_y_0)[name = string("attention_15_scaled_scores_0")]; + tensor attention_15_masked_scaled_scores_0 = add(x = attention_15_scaled_scores_0, y = transpose_0)[name = string("attention_15_masked_scaled_scores_0")]; + int32 softmax_30_axis_0 = const()[name = string("softmax_30_axis_0"), val = int32(-2)]; + tensor softmax_30 = softmax(axis = softmax_30_axis_0, x = attention_15_masked_scaled_scores_0)[name = string("softmax_30")]; + bool attention_15_attention_0_transpose_x_0 = const()[name = string("attention_15_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_15_attention_0_transpose_y_0 = const()[name = string("attention_15_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_15_attention_0 = matmul(transpose_x = attention_15_attention_0_transpose_x_0, transpose_y = attention_15_attention_0_transpose_y_0, x = softmax_30, y = attention_15_slice_value_cache_heads_0)[name = string("attention_15_attention_0")]; + bool attention_15_scores_1_transpose_y_0 = const()[name = string("attention_15_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_15_scores_1_transpose_x_0 = const()[name = string("attention_15_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_15_scores_1 = matmul(transpose_x = attention_15_scores_1_transpose_x_0, transpose_y = attention_15_scores_1_transpose_y_0, x = attention_15_key_cache_head_1, y = attention_15_q_splits_1)[name = string("attention_15_scores_1")]; + fp16 attention_15_scaled_scores_1_y_0 = const()[name = string("attention_15_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_15_scaled_scores_1 = mul(x = attention_15_scores_1, y = attention_15_scaled_scores_1_y_0)[name = string("attention_15_scaled_scores_1")]; + tensor attention_15_masked_scaled_scores_1 = add(x = attention_15_scaled_scores_1, y = transpose_0)[name = string("attention_15_masked_scaled_scores_1")]; + int32 softmax_31_axis_0 = const()[name = string("softmax_31_axis_0"), val = int32(-2)]; + tensor softmax_31 = softmax(axis = softmax_31_axis_0, x = attention_15_masked_scaled_scores_1)[name = string("softmax_31")]; + bool attention_15_attention_1_transpose_x_0 = const()[name = string("attention_15_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_15_attention_1_transpose_y_0 = const()[name = string("attention_15_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_15_attention_1 = matmul(transpose_x = attention_15_attention_1_transpose_x_0, transpose_y = attention_15_attention_1_transpose_y_0, x = softmax_31, y = attention_15_slice_value_cache_heads_1)[name = string("attention_15_attention_1")]; + int32 attention_15_concat_attention_all_heads_axis_0 = const()[name = string("attention_15_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_15_concat_attention_all_heads_interleave_0 = const()[name = string("attention_15_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_15_concat_attention_all_heads = concat(axis = attention_15_concat_attention_all_heads_axis_0, interleave = attention_15_concat_attention_all_heads_interleave_0, values = (attention_15_attention_0, attention_15_attention_1))[name = string("attention_15_concat_attention_all_heads")]; + tensor attention_15_channels_first_retransposed_perm_0 = const()[name = string("attention_15_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_15_reshaped_shape_0 = const()[name = string("attention_15_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_15_channels_first_retransposed = transpose(perm = attention_15_channels_first_retransposed_perm_0, x = attention_15_concat_attention_all_heads)[name = string("transpose_17")]; + tensor attention_15_reshaped = reshape(shape = attention_15_reshaped_shape_0, x = attention_15_channels_first_retransposed)[name = string("attention_15_reshaped")]; + tensor attention_15_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466247552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466849728))))[name = string("attention_15_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_71 = constexpr_blockwise_shift_scale(data = attention_15_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466878464))))[name = string("constexpr_blockwise_shift_scale_71")]; + tensor attention_15_outproj_strides_0 = const()[name = string("attention_15_outproj_strides_0"), val = tensor([1])]; + string attention_15_outproj_pad_type_0 = const()[name = string("attention_15_outproj_pad_type_0"), val = string("valid")]; + tensor attention_15_outproj_pad_0 = const()[name = string("attention_15_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_15_outproj_dilations_0 = const()[name = string("attention_15_outproj_dilations_0"), val = tensor([1])]; + int32 attention_15_outproj_groups_0 = const()[name = string("attention_15_outproj_groups_0"), val = int32(1)]; + tensor attention_15_outproj = conv(dilations = attention_15_outproj_dilations_0, groups = attention_15_outproj_groups_0, pad = attention_15_outproj_pad_0, pad_type = attention_15_outproj_pad_type_0, strides = attention_15_outproj_strides_0, weight = constexpr_blockwise_shift_scale_71, x = attention_15_reshaped)[name = string("attention_15_outproj")]; + tensor block_15_residual_1 = add(x = block_14_residual_2, y = attention_15_outproj)[name = string("block_15_residual_1")]; + tensor block_15_ffn_rmsnorm_abs = abs(x = block_15_residual_1)[name = string("block_15_ffn_rmsnorm_abs")]; + tensor block_15_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_15_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_15_ffn_rmsnorm_maxval = reduce_max(axes = block_15_ffn_rmsnorm_maxval_axes_0, keep_dims = block_15_ffn_rmsnorm_maxval_keep_dims_0, x = block_15_ffn_rmsnorm_abs)[name = string("block_15_ffn_rmsnorm_maxval")]; + fp16 block_15_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_15_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_15_ffn_rmsnorm_maxval_clipped = clip(alpha = block_15_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_15_ffn_rmsnorm_maxval_clipped_beta_0, x = block_15_ffn_rmsnorm_maxval)[name = string("block_15_ffn_rmsnorm_maxval_clipped")]; + tensor block_15_ffn_rmsnorm_scaled = real_div(x = block_15_residual_1, y = block_15_ffn_rmsnorm_maxval_clipped)[name = string("block_15_ffn_rmsnorm_scaled")]; + tensor block_15_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_15_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_15_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_15_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_15_ffn_rmsnorm_scaled)[name = string("block_15_ffn_rmsnorm_squared_sum")]; + fp16 block_15_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_15_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_15_ffn_rmsnorm_rsqrt_epsilon_0, x = block_15_ffn_rmsnorm_squared_sum)[name = string("block_15_ffn_rmsnorm_rsqrt")]; + fp16 block_15_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_15_ffn_rmsnorm_dim_scaled = mul(x = block_15_ffn_rmsnorm_scaled, y = block_15_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_15_ffn_rmsnorm_dim_scaled")]; + tensor block_15_ffn_rmsnorm_normalized = mul(x = block_15_ffn_rmsnorm_dim_scaled, y = block_15_ffn_rmsnorm_rsqrt)[name = string("block_15_ffn_rmsnorm_normalized")]; + tensor block_15_ffn_rmsnorm_y_0 = const()[name = string("block_15_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466880320)))]; + tensor block_15_ffn_rmsnorm = mul(x = block_15_ffn_rmsnorm_normalized, y = block_15_ffn_rmsnorm_y_0)[name = string("block_15_ffn_rmsnorm")]; + tensor block_15_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466882176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470150848))))[name = string("block_15_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_72 = constexpr_blockwise_shift_scale(data = block_15_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470306560))))[name = string("constexpr_blockwise_shift_scale_72")]; + tensor block_15_ffn_inproj_strides_0 = const()[name = string("block_15_ffn_inproj_strides_0"), val = tensor([1])]; + string block_15_ffn_inproj_pad_type_0 = const()[name = string("block_15_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_15_ffn_inproj_pad_0 = const()[name = string("block_15_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_15_ffn_inproj_dilations_0 = const()[name = string("block_15_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_15_ffn_inproj_groups_0 = const()[name = string("block_15_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_15_ffn_inproj = conv(dilations = block_15_ffn_inproj_dilations_0, groups = block_15_ffn_inproj_groups_0, pad = block_15_ffn_inproj_pad_0, pad_type = block_15_ffn_inproj_pad_type_0, strides = block_15_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_72, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_inproj")]; + tensor block_15_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470316352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473585024))))[name = string("block_15_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_73 = constexpr_blockwise_shift_scale(data = block_15_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473740736))))[name = string("constexpr_blockwise_shift_scale_73")]; + tensor block_15_ffn_g_strides_0 = const()[name = string("block_15_ffn_g_strides_0"), val = tensor([1])]; + string block_15_ffn_g_pad_type_0 = const()[name = string("block_15_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_15_ffn_g_pad_0 = const()[name = string("block_15_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_15_ffn_g_dilations_0 = const()[name = string("block_15_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_15_ffn_g_groups_0 = const()[name = string("block_15_ffn_g_groups_0"), val = int32(1)]; + tensor block_15_ffn_g = conv(dilations = block_15_ffn_g_dilations_0, groups = block_15_ffn_g_groups_0, pad = block_15_ffn_g_pad_0, pad_type = block_15_ffn_g_pad_type_0, strides = block_15_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_73, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_g")]; + tensor block_15_ffn_g_activation = silu(x = block_15_ffn_g)[name = string("block_15_ffn_g_activation")]; + tensor block_15_ffn_x_gated = mul(x = block_15_ffn_inproj, y = block_15_ffn_g_activation)[name = string("block_15_ffn_x_gated")]; + tensor block_15_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473750528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477019200))))[name = string("block_15_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_74 = constexpr_blockwise_shift_scale(data = block_15_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477047936))))[name = string("constexpr_blockwise_shift_scale_74")]; + tensor block_15_ffn_outproj_strides_0 = const()[name = string("block_15_ffn_outproj_strides_0"), val = tensor([1])]; + string block_15_ffn_outproj_pad_type_0 = const()[name = string("block_15_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_15_ffn_outproj_pad_0 = const()[name = string("block_15_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_15_ffn_outproj_dilations_0 = const()[name = string("block_15_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_15_ffn_outproj_groups_0 = const()[name = string("block_15_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_15_ffn_outproj = conv(dilations = block_15_ffn_outproj_dilations_0, groups = block_15_ffn_outproj_groups_0, pad = block_15_ffn_outproj_pad_0, pad_type = block_15_ffn_outproj_pad_type_0, strides = block_15_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_74, x = block_15_ffn_x_gated)[name = string("block_15_ffn_outproj")]; + tensor block_15_residual_2 = add(x = block_15_ffn_outproj, y = block_15_residual_1)[name = string("block_15_residual_2")]; + tensor block_16_attention_rmsnorm_abs = abs(x = block_15_residual_2)[name = string("block_16_attention_rmsnorm_abs")]; + tensor block_16_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_16_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_16_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_16_attention_rmsnorm_maxval = reduce_max(axes = block_16_attention_rmsnorm_maxval_axes_0, keep_dims = block_16_attention_rmsnorm_maxval_keep_dims_0, x = block_16_attention_rmsnorm_abs)[name = string("block_16_attention_rmsnorm_maxval")]; + fp16 block_16_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_16_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_16_attention_rmsnorm_maxval_clipped = clip(alpha = block_16_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_16_attention_rmsnorm_maxval_clipped_beta_0, x = block_16_attention_rmsnorm_maxval)[name = string("block_16_attention_rmsnorm_maxval_clipped")]; + tensor block_16_attention_rmsnorm_scaled = real_div(x = block_15_residual_2, y = block_16_attention_rmsnorm_maxval_clipped)[name = string("block_16_attention_rmsnorm_scaled")]; + tensor block_16_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_16_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_16_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_16_attention_rmsnorm_squared_sum_keep_dims_0, x = block_16_attention_rmsnorm_scaled)[name = string("block_16_attention_rmsnorm_squared_sum")]; + fp16 block_16_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_16_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_16_attention_rmsnorm_rsqrt_epsilon_0, x = block_16_attention_rmsnorm_squared_sum)[name = string("block_16_attention_rmsnorm_rsqrt")]; + fp16 block_16_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_16_attention_rmsnorm_dim_scaled = mul(x = block_16_attention_rmsnorm_scaled, y = block_16_attention_rmsnorm_dim_scaled_y_0)[name = string("block_16_attention_rmsnorm_dim_scaled")]; + tensor block_16_attention_rmsnorm_normalized = mul(x = block_16_attention_rmsnorm_dim_scaled, y = block_16_attention_rmsnorm_rsqrt)[name = string("block_16_attention_rmsnorm_normalized")]; + tensor block_16_attention_rmsnorm_y_0 = const()[name = string("block_16_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477049792)))]; + tensor block_16_attention_rmsnorm = mul(x = block_16_attention_rmsnorm_normalized, y = block_16_attention_rmsnorm_y_0)[name = string("block_16_attention_rmsnorm")]; + tensor attention_16_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477051648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477825856))))[name = string("attention_16_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_75 = constexpr_blockwise_shift_scale(data = attention_16_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477862784))))[name = string("constexpr_blockwise_shift_scale_75")]; + tensor attention_16_qkvproj_bias_0 = const()[name = string("attention_16_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477865152)))]; + tensor attention_16_qkvproj_strides_0 = const()[name = string("attention_16_qkvproj_strides_0"), val = tensor([1])]; + string attention_16_qkvproj_pad_type_0 = const()[name = string("attention_16_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_16_qkvproj_pad_0 = const()[name = string("attention_16_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_16_qkvproj_dilations_0 = const()[name = string("attention_16_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_16_qkvproj_groups_0 = const()[name = string("attention_16_qkvproj_groups_0"), val = int32(1)]; + tensor attention_16_qkvproj = conv(bias = attention_16_qkvproj_bias_0, dilations = attention_16_qkvproj_dilations_0, groups = attention_16_qkvproj_groups_0, pad = attention_16_qkvproj_pad_0, pad_type = attention_16_qkvproj_pad_type_0, strides = attention_16_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_75, x = block_16_attention_rmsnorm)[name = string("attention_16_qkvproj")]; + tensor attention_16_head_reshape_shape_0 = const()[name = string("attention_16_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_16_head_reshape = reshape(shape = attention_16_head_reshape_shape_0, x = attention_16_qkvproj)[name = string("attention_16_head_reshape")]; + tensor attention_16_head_transpose_perm_0 = const()[name = string("attention_16_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_16_split_qkv_heads_axis_0 = const()[name = string("attention_16_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_16_split_qkv_heads_split_sizes_0 = const()[name = string("attention_16_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_16_head_transpose = transpose(perm = attention_16_head_transpose_perm_0, x = attention_16_head_reshape)[name = string("transpose_16")]; + tensor attention_16_split_qkv_heads_0, tensor attention_16_split_qkv_heads_1, tensor attention_16_split_qkv_heads_2 = split(axis = attention_16_split_qkv_heads_axis_0, split_sizes = attention_16_split_qkv_heads_split_sizes_0, x = attention_16_head_transpose)[name = string("attention_16_split_qkv_heads")]; + tensor attention_16_q_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_16_q_rope_lhs_mult")]; + int32 attention_16_q_rotate_half_split_num_splits_0 = const()[name = string("attention_16_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_16_q_rotate_half_split_axis_0 = const()[name = string("attention_16_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_16_q_rotate_half_split_0, tensor attention_16_q_rotate_half_split_1 = split(axis = attention_16_q_rotate_half_split_axis_0, num_splits = attention_16_q_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_0)[name = string("attention_16_q_rotate_half_split")]; + fp16 attention_16_q_rotate_half_neg_y_0 = const()[name = string("attention_16_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_16_q_rotate_half_neg = mul(x = attention_16_q_rotate_half_split_1, y = attention_16_q_rotate_half_neg_y_0)[name = string("attention_16_q_rotate_half_neg")]; + int32 attention_16_q_rotate_half_concat_axis_0 = const()[name = string("attention_16_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_16_q_rotate_half_concat_interleave_0 = const()[name = string("attention_16_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_16_q_rotate_half_concat = concat(axis = attention_16_q_rotate_half_concat_axis_0, interleave = attention_16_q_rotate_half_concat_interleave_0, values = (attention_16_q_rotate_half_neg, attention_16_q_rotate_half_split_0))[name = string("attention_16_q_rotate_half_concat")]; + tensor attention_16_q_rope_rhs_mult = mul(x = attention_16_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_q_rope_rhs_mult")]; + tensor attention_16_q_rope = add(x = attention_16_q_rope_lhs_mult, y = attention_16_q_rope_rhs_mult)[name = string("attention_16_q_rope")]; + tensor attention_16_k_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_16_k_rope_lhs_mult")]; + int32 attention_16_k_rotate_half_split_num_splits_0 = const()[name = string("attention_16_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_16_k_rotate_half_split_axis_0 = const()[name = string("attention_16_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_16_k_rotate_half_split_0, tensor attention_16_k_rotate_half_split_1 = split(axis = attention_16_k_rotate_half_split_axis_0, num_splits = attention_16_k_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_1)[name = string("attention_16_k_rotate_half_split")]; + fp16 attention_16_k_rotate_half_neg_y_0 = const()[name = string("attention_16_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_16_k_rotate_half_neg = mul(x = attention_16_k_rotate_half_split_1, y = attention_16_k_rotate_half_neg_y_0)[name = string("attention_16_k_rotate_half_neg")]; + int32 attention_16_k_rotate_half_concat_axis_0 = const()[name = string("attention_16_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_16_k_rotate_half_concat_interleave_0 = const()[name = string("attention_16_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_16_k_rotate_half_concat = concat(axis = attention_16_k_rotate_half_concat_axis_0, interleave = attention_16_k_rotate_half_concat_interleave_0, values = (attention_16_k_rotate_half_neg, attention_16_k_rotate_half_split_0))[name = string("attention_16_k_rotate_half_concat")]; + tensor attention_16_k_rope_rhs_mult = mul(x = attention_16_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_k_rope_rhs_mult")]; + tensor attention_16_k_rope = add(x = attention_16_k_rope_lhs_mult, y = attention_16_k_rope_rhs_mult)[name = string("attention_16_k_rope")]; + int32 attention_16_q_splits_axis_0 = const()[name = string("attention_16_q_splits_axis_0"), val = int32(1)]; + int32 attention_16_q_splits_num_splits_0 = const()[name = string("attention_16_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_16_q_splits_0, tensor attention_16_q_splits_1 = split(axis = attention_16_q_splits_axis_0, num_splits = attention_16_q_splits_num_splits_0, x = attention_16_q_rope)[name = string("attention_16_q_splits")]; + tensor attention_16_update_begin_0_values0_0 = const()[name = string("attention_16_update_begin_0_values0_0"), val = tensor([16])]; + tensor attention_16_update_begin_0_values1_0 = const()[name = string("attention_16_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_16_update_begin_0_values3_0 = const()[name = string("attention_16_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_16_update_begin_0_axis_0 = const()[name = string("attention_16_update_begin_0_axis_0"), val = int32(0)]; + bool attention_16_update_begin_0_interleave_0 = const()[name = string("attention_16_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_16_update_begin_0 = concat(axis = attention_16_update_begin_0_axis_0, interleave = attention_16_update_begin_0_interleave_0, values = (attention_16_update_begin_0_values0_0, attention_16_update_begin_0_values1_0, query_pos1, attention_16_update_begin_0_values3_0))[name = string("attention_16_update_begin_0")]; + tensor attention_16_update_end_0_values0_0 = const()[name = string("attention_16_update_end_0_values0_0"), val = tensor([17])]; + tensor attention_16_update_end_0_values1_0 = const()[name = string("attention_16_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_16_update_end_0_values3_0 = const()[name = string("attention_16_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_16_update_end_0_axis_0 = const()[name = string("attention_16_update_end_0_axis_0"), val = int32(0)]; + bool attention_16_update_end_0_interleave_0 = const()[name = string("attention_16_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_16_update_end_0 = concat(axis = attention_16_update_end_0_axis_0, interleave = attention_16_update_end_0_interleave_0, values = (attention_16_update_end_0_values0_0, attention_16_update_end_0_values1_0, end_pos_0, attention_16_update_end_0_values3_0))[name = string("attention_16_update_end_0")]; + tensor attention_16_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_updated_key_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_key_cache_0_squeeze_mask_0, update = attention_16_k_rope, x = coreml_update_state_30)[name = string("attention_16_updated_key_cache_0")]; + write_state(data = attention_16_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_32_write_state")]; + tensor coreml_update_state_32 = read_state(input = key_cache_state)[name = string("coreml_update_state_32")]; + tensor attention_16_key_cache_begin_0 = const()[name = string("attention_16_key_cache_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor attention_16_key_cache_end_0 = const()[name = string("attention_16_key_cache_end_0"), val = tensor([17, 2, 512, 64])]; + tensor attention_16_key_cache_squeeze_mask_0 = const()[name = string("attention_16_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_key_cache = slice_by_index(begin = attention_16_key_cache_begin_0, end = attention_16_key_cache_end_0, squeeze_mask = attention_16_key_cache_squeeze_mask_0, x = coreml_update_state_32)[name = string("attention_16_key_cache")]; + int32 attention_16_key_cache_head_axis_0 = const()[name = string("attention_16_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_16_key_cache_head_num_splits_0 = const()[name = string("attention_16_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_16_key_cache_head_0, tensor attention_16_key_cache_head_1 = split(axis = attention_16_key_cache_head_axis_0, num_splits = attention_16_key_cache_head_num_splits_0, x = attention_16_key_cache)[name = string("attention_16_key_cache_head")]; + tensor attention_16_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_updated_value_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_value_cache_0_squeeze_mask_0, update = attention_16_split_qkv_heads_2, x = coreml_update_state_31)[name = string("attention_16_updated_value_cache_0")]; + write_state(data = attention_16_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_33_write_state")]; + tensor coreml_update_state_33 = read_state(input = value_cache_state)[name = string("coreml_update_state_33")]; + tensor attention_16_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_16_slice_current_layer_value_cache_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor attention_16_slice_current_layer_value_cache_end_0 = const()[name = string("attention_16_slice_current_layer_value_cache_end_0"), val = tensor([17, 2, 512, 64])]; + tensor attention_16_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_16_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_slice_current_layer_value_cache = slice_by_index(begin = attention_16_slice_current_layer_value_cache_begin_0, end = attention_16_slice_current_layer_value_cache_end_0, squeeze_mask = attention_16_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_33)[name = string("attention_16_slice_current_layer_value_cache")]; + int32 attention_16_slice_value_cache_heads_axis_0 = const()[name = string("attention_16_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_16_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_16_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_16_slice_value_cache_heads_0, tensor attention_16_slice_value_cache_heads_1 = split(axis = attention_16_slice_value_cache_heads_axis_0, num_splits = attention_16_slice_value_cache_heads_num_splits_0, x = attention_16_slice_current_layer_value_cache)[name = string("attention_16_slice_value_cache_heads")]; + bool attention_16_scores_0_transpose_y_0 = const()[name = string("attention_16_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_16_scores_0_transpose_x_0 = const()[name = string("attention_16_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_16_scores_0 = matmul(transpose_x = attention_16_scores_0_transpose_x_0, transpose_y = attention_16_scores_0_transpose_y_0, x = attention_16_key_cache_head_0, y = attention_16_q_splits_0)[name = string("attention_16_scores_0")]; + fp16 attention_16_scaled_scores_0_y_0 = const()[name = string("attention_16_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_16_scaled_scores_0 = mul(x = attention_16_scores_0, y = attention_16_scaled_scores_0_y_0)[name = string("attention_16_scaled_scores_0")]; + tensor attention_16_masked_scaled_scores_0 = add(x = attention_16_scaled_scores_0, y = transpose_0)[name = string("attention_16_masked_scaled_scores_0")]; + int32 softmax_32_axis_0 = const()[name = string("softmax_32_axis_0"), val = int32(-2)]; + tensor softmax_32 = softmax(axis = softmax_32_axis_0, x = attention_16_masked_scaled_scores_0)[name = string("softmax_32")]; + bool attention_16_attention_0_transpose_x_0 = const()[name = string("attention_16_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_16_attention_0_transpose_y_0 = const()[name = string("attention_16_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_16_attention_0 = matmul(transpose_x = attention_16_attention_0_transpose_x_0, transpose_y = attention_16_attention_0_transpose_y_0, x = softmax_32, y = attention_16_slice_value_cache_heads_0)[name = string("attention_16_attention_0")]; + bool attention_16_scores_1_transpose_y_0 = const()[name = string("attention_16_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_16_scores_1_transpose_x_0 = const()[name = string("attention_16_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_16_scores_1 = matmul(transpose_x = attention_16_scores_1_transpose_x_0, transpose_y = attention_16_scores_1_transpose_y_0, x = attention_16_key_cache_head_1, y = attention_16_q_splits_1)[name = string("attention_16_scores_1")]; + fp16 attention_16_scaled_scores_1_y_0 = const()[name = string("attention_16_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_16_scaled_scores_1 = mul(x = attention_16_scores_1, y = attention_16_scaled_scores_1_y_0)[name = string("attention_16_scaled_scores_1")]; + tensor attention_16_masked_scaled_scores_1 = add(x = attention_16_scaled_scores_1, y = transpose_0)[name = string("attention_16_masked_scaled_scores_1")]; + int32 softmax_33_axis_0 = const()[name = string("softmax_33_axis_0"), val = int32(-2)]; + tensor softmax_33 = softmax(axis = softmax_33_axis_0, x = attention_16_masked_scaled_scores_1)[name = string("softmax_33")]; + bool attention_16_attention_1_transpose_x_0 = const()[name = string("attention_16_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_16_attention_1_transpose_y_0 = const()[name = string("attention_16_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_16_attention_1 = matmul(transpose_x = attention_16_attention_1_transpose_x_0, transpose_y = attention_16_attention_1_transpose_y_0, x = softmax_33, y = attention_16_slice_value_cache_heads_1)[name = string("attention_16_attention_1")]; + int32 attention_16_concat_attention_all_heads_axis_0 = const()[name = string("attention_16_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_16_concat_attention_all_heads_interleave_0 = const()[name = string("attention_16_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_16_concat_attention_all_heads = concat(axis = attention_16_concat_attention_all_heads_axis_0, interleave = attention_16_concat_attention_all_heads_interleave_0, values = (attention_16_attention_0, attention_16_attention_1))[name = string("attention_16_concat_attention_all_heads")]; + tensor attention_16_channels_first_retransposed_perm_0 = const()[name = string("attention_16_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_16_reshaped_shape_0 = const()[name = string("attention_16_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_16_channels_first_retransposed = transpose(perm = attention_16_channels_first_retransposed_perm_0, x = attention_16_concat_attention_all_heads)[name = string("transpose_15")]; + tensor attention_16_reshaped = reshape(shape = attention_16_reshaped_shape_0, x = attention_16_channels_first_retransposed)[name = string("attention_16_reshaped")]; + tensor attention_16_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477867520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478469696))))[name = string("attention_16_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_76 = constexpr_blockwise_shift_scale(data = attention_16_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478498432))))[name = string("constexpr_blockwise_shift_scale_76")]; + tensor attention_16_outproj_strides_0 = const()[name = string("attention_16_outproj_strides_0"), val = tensor([1])]; + string attention_16_outproj_pad_type_0 = const()[name = string("attention_16_outproj_pad_type_0"), val = string("valid")]; + tensor attention_16_outproj_pad_0 = const()[name = string("attention_16_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_16_outproj_dilations_0 = const()[name = string("attention_16_outproj_dilations_0"), val = tensor([1])]; + int32 attention_16_outproj_groups_0 = const()[name = string("attention_16_outproj_groups_0"), val = int32(1)]; + tensor attention_16_outproj = conv(dilations = attention_16_outproj_dilations_0, groups = attention_16_outproj_groups_0, pad = attention_16_outproj_pad_0, pad_type = attention_16_outproj_pad_type_0, strides = attention_16_outproj_strides_0, weight = constexpr_blockwise_shift_scale_76, x = attention_16_reshaped)[name = string("attention_16_outproj")]; + tensor block_16_residual_1 = add(x = block_15_residual_2, y = attention_16_outproj)[name = string("block_16_residual_1")]; + tensor block_16_ffn_rmsnorm_abs = abs(x = block_16_residual_1)[name = string("block_16_ffn_rmsnorm_abs")]; + tensor block_16_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_16_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_16_ffn_rmsnorm_maxval = reduce_max(axes = block_16_ffn_rmsnorm_maxval_axes_0, keep_dims = block_16_ffn_rmsnorm_maxval_keep_dims_0, x = block_16_ffn_rmsnorm_abs)[name = string("block_16_ffn_rmsnorm_maxval")]; + fp16 block_16_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_16_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_16_ffn_rmsnorm_maxval_clipped = clip(alpha = block_16_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_16_ffn_rmsnorm_maxval_clipped_beta_0, x = block_16_ffn_rmsnorm_maxval)[name = string("block_16_ffn_rmsnorm_maxval_clipped")]; + tensor block_16_ffn_rmsnorm_scaled = real_div(x = block_16_residual_1, y = block_16_ffn_rmsnorm_maxval_clipped)[name = string("block_16_ffn_rmsnorm_scaled")]; + tensor block_16_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_16_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_16_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_16_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_16_ffn_rmsnorm_scaled)[name = string("block_16_ffn_rmsnorm_squared_sum")]; + fp16 block_16_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_16_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_16_ffn_rmsnorm_rsqrt_epsilon_0, x = block_16_ffn_rmsnorm_squared_sum)[name = string("block_16_ffn_rmsnorm_rsqrt")]; + fp16 block_16_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_16_ffn_rmsnorm_dim_scaled = mul(x = block_16_ffn_rmsnorm_scaled, y = block_16_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_16_ffn_rmsnorm_dim_scaled")]; + tensor block_16_ffn_rmsnorm_normalized = mul(x = block_16_ffn_rmsnorm_dim_scaled, y = block_16_ffn_rmsnorm_rsqrt)[name = string("block_16_ffn_rmsnorm_normalized")]; + tensor block_16_ffn_rmsnorm_y_0 = const()[name = string("block_16_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478500288)))]; + tensor block_16_ffn_rmsnorm = mul(x = block_16_ffn_rmsnorm_normalized, y = block_16_ffn_rmsnorm_y_0)[name = string("block_16_ffn_rmsnorm")]; + tensor block_16_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478502144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481770816))))[name = string("block_16_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_77 = constexpr_blockwise_shift_scale(data = block_16_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481926528))))[name = string("constexpr_blockwise_shift_scale_77")]; + tensor block_16_ffn_inproj_strides_0 = const()[name = string("block_16_ffn_inproj_strides_0"), val = tensor([1])]; + string block_16_ffn_inproj_pad_type_0 = const()[name = string("block_16_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_16_ffn_inproj_pad_0 = const()[name = string("block_16_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_16_ffn_inproj_dilations_0 = const()[name = string("block_16_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_16_ffn_inproj_groups_0 = const()[name = string("block_16_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_16_ffn_inproj = conv(dilations = block_16_ffn_inproj_dilations_0, groups = block_16_ffn_inproj_groups_0, pad = block_16_ffn_inproj_pad_0, pad_type = block_16_ffn_inproj_pad_type_0, strides = block_16_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_77, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_inproj")]; + tensor block_16_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481936320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485204992))))[name = string("block_16_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_78 = constexpr_blockwise_shift_scale(data = block_16_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485360704))))[name = string("constexpr_blockwise_shift_scale_78")]; + tensor block_16_ffn_g_strides_0 = const()[name = string("block_16_ffn_g_strides_0"), val = tensor([1])]; + string block_16_ffn_g_pad_type_0 = const()[name = string("block_16_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_16_ffn_g_pad_0 = const()[name = string("block_16_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_16_ffn_g_dilations_0 = const()[name = string("block_16_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_16_ffn_g_groups_0 = const()[name = string("block_16_ffn_g_groups_0"), val = int32(1)]; + tensor block_16_ffn_g = conv(dilations = block_16_ffn_g_dilations_0, groups = block_16_ffn_g_groups_0, pad = block_16_ffn_g_pad_0, pad_type = block_16_ffn_g_pad_type_0, strides = block_16_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_78, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_g")]; + tensor block_16_ffn_g_activation = silu(x = block_16_ffn_g)[name = string("block_16_ffn_g_activation")]; + tensor block_16_ffn_x_gated = mul(x = block_16_ffn_inproj, y = block_16_ffn_g_activation)[name = string("block_16_ffn_x_gated")]; + tensor block_16_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485370496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488639168))))[name = string("block_16_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_79 = constexpr_blockwise_shift_scale(data = block_16_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488667904))))[name = string("constexpr_blockwise_shift_scale_79")]; + tensor block_16_ffn_outproj_strides_0 = const()[name = string("block_16_ffn_outproj_strides_0"), val = tensor([1])]; + string block_16_ffn_outproj_pad_type_0 = const()[name = string("block_16_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_16_ffn_outproj_pad_0 = const()[name = string("block_16_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_16_ffn_outproj_dilations_0 = const()[name = string("block_16_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_16_ffn_outproj_groups_0 = const()[name = string("block_16_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_16_ffn_outproj = conv(dilations = block_16_ffn_outproj_dilations_0, groups = block_16_ffn_outproj_groups_0, pad = block_16_ffn_outproj_pad_0, pad_type = block_16_ffn_outproj_pad_type_0, strides = block_16_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_79, x = block_16_ffn_x_gated)[name = string("block_16_ffn_outproj")]; + tensor block_16_residual_2 = add(x = block_16_ffn_outproj, y = block_16_residual_1)[name = string("block_16_residual_2")]; + tensor block_17_attention_rmsnorm_abs = abs(x = block_16_residual_2)[name = string("block_17_attention_rmsnorm_abs")]; + tensor block_17_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_17_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_17_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_17_attention_rmsnorm_maxval = reduce_max(axes = block_17_attention_rmsnorm_maxval_axes_0, keep_dims = block_17_attention_rmsnorm_maxval_keep_dims_0, x = block_17_attention_rmsnorm_abs)[name = string("block_17_attention_rmsnorm_maxval")]; + fp16 block_17_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_17_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_17_attention_rmsnorm_maxval_clipped = clip(alpha = block_17_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_17_attention_rmsnorm_maxval_clipped_beta_0, x = block_17_attention_rmsnorm_maxval)[name = string("block_17_attention_rmsnorm_maxval_clipped")]; + tensor block_17_attention_rmsnorm_scaled = real_div(x = block_16_residual_2, y = block_17_attention_rmsnorm_maxval_clipped)[name = string("block_17_attention_rmsnorm_scaled")]; + tensor block_17_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_17_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_17_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_17_attention_rmsnorm_squared_sum_keep_dims_0, x = block_17_attention_rmsnorm_scaled)[name = string("block_17_attention_rmsnorm_squared_sum")]; + fp16 block_17_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_17_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_17_attention_rmsnorm_rsqrt_epsilon_0, x = block_17_attention_rmsnorm_squared_sum)[name = string("block_17_attention_rmsnorm_rsqrt")]; + fp16 block_17_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_17_attention_rmsnorm_dim_scaled = mul(x = block_17_attention_rmsnorm_scaled, y = block_17_attention_rmsnorm_dim_scaled_y_0)[name = string("block_17_attention_rmsnorm_dim_scaled")]; + tensor block_17_attention_rmsnorm_normalized = mul(x = block_17_attention_rmsnorm_dim_scaled, y = block_17_attention_rmsnorm_rsqrt)[name = string("block_17_attention_rmsnorm_normalized")]; + tensor block_17_attention_rmsnorm_y_0 = const()[name = string("block_17_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488669760)))]; + tensor block_17_attention_rmsnorm = mul(x = block_17_attention_rmsnorm_normalized, y = block_17_attention_rmsnorm_y_0)[name = string("block_17_attention_rmsnorm")]; + tensor attention_17_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488671616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489445824))))[name = string("attention_17_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_80 = constexpr_blockwise_shift_scale(data = attention_17_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489482752))))[name = string("constexpr_blockwise_shift_scale_80")]; + tensor attention_17_qkvproj_bias_0 = const()[name = string("attention_17_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489485120)))]; + tensor attention_17_qkvproj_strides_0 = const()[name = string("attention_17_qkvproj_strides_0"), val = tensor([1])]; + string attention_17_qkvproj_pad_type_0 = const()[name = string("attention_17_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_17_qkvproj_pad_0 = const()[name = string("attention_17_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_17_qkvproj_dilations_0 = const()[name = string("attention_17_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_17_qkvproj_groups_0 = const()[name = string("attention_17_qkvproj_groups_0"), val = int32(1)]; + tensor attention_17_qkvproj = conv(bias = attention_17_qkvproj_bias_0, dilations = attention_17_qkvproj_dilations_0, groups = attention_17_qkvproj_groups_0, pad = attention_17_qkvproj_pad_0, pad_type = attention_17_qkvproj_pad_type_0, strides = attention_17_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_80, x = block_17_attention_rmsnorm)[name = string("attention_17_qkvproj")]; + tensor attention_17_head_reshape_shape_0 = const()[name = string("attention_17_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_17_head_reshape = reshape(shape = attention_17_head_reshape_shape_0, x = attention_17_qkvproj)[name = string("attention_17_head_reshape")]; + tensor attention_17_head_transpose_perm_0 = const()[name = string("attention_17_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_17_split_qkv_heads_axis_0 = const()[name = string("attention_17_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_17_split_qkv_heads_split_sizes_0 = const()[name = string("attention_17_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_17_head_transpose = transpose(perm = attention_17_head_transpose_perm_0, x = attention_17_head_reshape)[name = string("transpose_14")]; + tensor attention_17_split_qkv_heads_0, tensor attention_17_split_qkv_heads_1, tensor attention_17_split_qkv_heads_2 = split(axis = attention_17_split_qkv_heads_axis_0, split_sizes = attention_17_split_qkv_heads_split_sizes_0, x = attention_17_head_transpose)[name = string("attention_17_split_qkv_heads")]; + tensor attention_17_q_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_17_q_rope_lhs_mult")]; + int32 attention_17_q_rotate_half_split_num_splits_0 = const()[name = string("attention_17_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_17_q_rotate_half_split_axis_0 = const()[name = string("attention_17_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_17_q_rotate_half_split_0, tensor attention_17_q_rotate_half_split_1 = split(axis = attention_17_q_rotate_half_split_axis_0, num_splits = attention_17_q_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_0)[name = string("attention_17_q_rotate_half_split")]; + fp16 attention_17_q_rotate_half_neg_y_0 = const()[name = string("attention_17_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_17_q_rotate_half_neg = mul(x = attention_17_q_rotate_half_split_1, y = attention_17_q_rotate_half_neg_y_0)[name = string("attention_17_q_rotate_half_neg")]; + int32 attention_17_q_rotate_half_concat_axis_0 = const()[name = string("attention_17_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_17_q_rotate_half_concat_interleave_0 = const()[name = string("attention_17_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_17_q_rotate_half_concat = concat(axis = attention_17_q_rotate_half_concat_axis_0, interleave = attention_17_q_rotate_half_concat_interleave_0, values = (attention_17_q_rotate_half_neg, attention_17_q_rotate_half_split_0))[name = string("attention_17_q_rotate_half_concat")]; + tensor attention_17_q_rope_rhs_mult = mul(x = attention_17_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_q_rope_rhs_mult")]; + tensor attention_17_q_rope = add(x = attention_17_q_rope_lhs_mult, y = attention_17_q_rope_rhs_mult)[name = string("attention_17_q_rope")]; + tensor attention_17_k_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_17_k_rope_lhs_mult")]; + int32 attention_17_k_rotate_half_split_num_splits_0 = const()[name = string("attention_17_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_17_k_rotate_half_split_axis_0 = const()[name = string("attention_17_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_17_k_rotate_half_split_0, tensor attention_17_k_rotate_half_split_1 = split(axis = attention_17_k_rotate_half_split_axis_0, num_splits = attention_17_k_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_1)[name = string("attention_17_k_rotate_half_split")]; + fp16 attention_17_k_rotate_half_neg_y_0 = const()[name = string("attention_17_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_17_k_rotate_half_neg = mul(x = attention_17_k_rotate_half_split_1, y = attention_17_k_rotate_half_neg_y_0)[name = string("attention_17_k_rotate_half_neg")]; + int32 attention_17_k_rotate_half_concat_axis_0 = const()[name = string("attention_17_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_17_k_rotate_half_concat_interleave_0 = const()[name = string("attention_17_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_17_k_rotate_half_concat = concat(axis = attention_17_k_rotate_half_concat_axis_0, interleave = attention_17_k_rotate_half_concat_interleave_0, values = (attention_17_k_rotate_half_neg, attention_17_k_rotate_half_split_0))[name = string("attention_17_k_rotate_half_concat")]; + tensor attention_17_k_rope_rhs_mult = mul(x = attention_17_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_k_rope_rhs_mult")]; + tensor attention_17_k_rope = add(x = attention_17_k_rope_lhs_mult, y = attention_17_k_rope_rhs_mult)[name = string("attention_17_k_rope")]; + int32 attention_17_q_splits_axis_0 = const()[name = string("attention_17_q_splits_axis_0"), val = int32(1)]; + int32 attention_17_q_splits_num_splits_0 = const()[name = string("attention_17_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_17_q_splits_0, tensor attention_17_q_splits_1 = split(axis = attention_17_q_splits_axis_0, num_splits = attention_17_q_splits_num_splits_0, x = attention_17_q_rope)[name = string("attention_17_q_splits")]; + tensor attention_17_update_begin_0_values0_0 = const()[name = string("attention_17_update_begin_0_values0_0"), val = tensor([17])]; + tensor attention_17_update_begin_0_values1_0 = const()[name = string("attention_17_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_17_update_begin_0_values3_0 = const()[name = string("attention_17_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_17_update_begin_0_axis_0 = const()[name = string("attention_17_update_begin_0_axis_0"), val = int32(0)]; + bool attention_17_update_begin_0_interleave_0 = const()[name = string("attention_17_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_17_update_begin_0 = concat(axis = attention_17_update_begin_0_axis_0, interleave = attention_17_update_begin_0_interleave_0, values = (attention_17_update_begin_0_values0_0, attention_17_update_begin_0_values1_0, query_pos1, attention_17_update_begin_0_values3_0))[name = string("attention_17_update_begin_0")]; + tensor attention_17_update_end_0_values0_0 = const()[name = string("attention_17_update_end_0_values0_0"), val = tensor([18])]; + tensor attention_17_update_end_0_values1_0 = const()[name = string("attention_17_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_17_update_end_0_values3_0 = const()[name = string("attention_17_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_17_update_end_0_axis_0 = const()[name = string("attention_17_update_end_0_axis_0"), val = int32(0)]; + bool attention_17_update_end_0_interleave_0 = const()[name = string("attention_17_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_17_update_end_0 = concat(axis = attention_17_update_end_0_axis_0, interleave = attention_17_update_end_0_interleave_0, values = (attention_17_update_end_0_values0_0, attention_17_update_end_0_values1_0, end_pos_0, attention_17_update_end_0_values3_0))[name = string("attention_17_update_end_0")]; + tensor attention_17_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_updated_key_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_key_cache_0_squeeze_mask_0, update = attention_17_k_rope, x = coreml_update_state_32)[name = string("attention_17_updated_key_cache_0")]; + write_state(data = attention_17_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_34_write_state")]; + tensor coreml_update_state_34 = read_state(input = key_cache_state)[name = string("coreml_update_state_34")]; + tensor attention_17_key_cache_begin_0 = const()[name = string("attention_17_key_cache_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor attention_17_key_cache_end_0 = const()[name = string("attention_17_key_cache_end_0"), val = tensor([18, 2, 512, 64])]; + tensor attention_17_key_cache_squeeze_mask_0 = const()[name = string("attention_17_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_key_cache = slice_by_index(begin = attention_17_key_cache_begin_0, end = attention_17_key_cache_end_0, squeeze_mask = attention_17_key_cache_squeeze_mask_0, x = coreml_update_state_34)[name = string("attention_17_key_cache")]; + int32 attention_17_key_cache_head_axis_0 = const()[name = string("attention_17_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_17_key_cache_head_num_splits_0 = const()[name = string("attention_17_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_17_key_cache_head_0, tensor attention_17_key_cache_head_1 = split(axis = attention_17_key_cache_head_axis_0, num_splits = attention_17_key_cache_head_num_splits_0, x = attention_17_key_cache)[name = string("attention_17_key_cache_head")]; + tensor attention_17_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_updated_value_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_value_cache_0_squeeze_mask_0, update = attention_17_split_qkv_heads_2, x = coreml_update_state_33)[name = string("attention_17_updated_value_cache_0")]; + write_state(data = attention_17_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_35_write_state")]; + tensor coreml_update_state_35 = read_state(input = value_cache_state)[name = string("coreml_update_state_35")]; + tensor attention_17_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_17_slice_current_layer_value_cache_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor attention_17_slice_current_layer_value_cache_end_0 = const()[name = string("attention_17_slice_current_layer_value_cache_end_0"), val = tensor([18, 2, 512, 64])]; + tensor attention_17_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_17_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_slice_current_layer_value_cache = slice_by_index(begin = attention_17_slice_current_layer_value_cache_begin_0, end = attention_17_slice_current_layer_value_cache_end_0, squeeze_mask = attention_17_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_35)[name = string("attention_17_slice_current_layer_value_cache")]; + int32 attention_17_slice_value_cache_heads_axis_0 = const()[name = string("attention_17_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_17_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_17_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_17_slice_value_cache_heads_0, tensor attention_17_slice_value_cache_heads_1 = split(axis = attention_17_slice_value_cache_heads_axis_0, num_splits = attention_17_slice_value_cache_heads_num_splits_0, x = attention_17_slice_current_layer_value_cache)[name = string("attention_17_slice_value_cache_heads")]; + bool attention_17_scores_0_transpose_y_0 = const()[name = string("attention_17_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_17_scores_0_transpose_x_0 = const()[name = string("attention_17_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_17_scores_0 = matmul(transpose_x = attention_17_scores_0_transpose_x_0, transpose_y = attention_17_scores_0_transpose_y_0, x = attention_17_key_cache_head_0, y = attention_17_q_splits_0)[name = string("attention_17_scores_0")]; + fp16 attention_17_scaled_scores_0_y_0 = const()[name = string("attention_17_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_17_scaled_scores_0 = mul(x = attention_17_scores_0, y = attention_17_scaled_scores_0_y_0)[name = string("attention_17_scaled_scores_0")]; + tensor attention_17_masked_scaled_scores_0 = add(x = attention_17_scaled_scores_0, y = transpose_0)[name = string("attention_17_masked_scaled_scores_0")]; + int32 softmax_34_axis_0 = const()[name = string("softmax_34_axis_0"), val = int32(-2)]; + tensor softmax_34 = softmax(axis = softmax_34_axis_0, x = attention_17_masked_scaled_scores_0)[name = string("softmax_34")]; + bool attention_17_attention_0_transpose_x_0 = const()[name = string("attention_17_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_17_attention_0_transpose_y_0 = const()[name = string("attention_17_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_17_attention_0 = matmul(transpose_x = attention_17_attention_0_transpose_x_0, transpose_y = attention_17_attention_0_transpose_y_0, x = softmax_34, y = attention_17_slice_value_cache_heads_0)[name = string("attention_17_attention_0")]; + bool attention_17_scores_1_transpose_y_0 = const()[name = string("attention_17_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_17_scores_1_transpose_x_0 = const()[name = string("attention_17_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_17_scores_1 = matmul(transpose_x = attention_17_scores_1_transpose_x_0, transpose_y = attention_17_scores_1_transpose_y_0, x = attention_17_key_cache_head_1, y = attention_17_q_splits_1)[name = string("attention_17_scores_1")]; + fp16 attention_17_scaled_scores_1_y_0 = const()[name = string("attention_17_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_17_scaled_scores_1 = mul(x = attention_17_scores_1, y = attention_17_scaled_scores_1_y_0)[name = string("attention_17_scaled_scores_1")]; + tensor attention_17_masked_scaled_scores_1 = add(x = attention_17_scaled_scores_1, y = transpose_0)[name = string("attention_17_masked_scaled_scores_1")]; + int32 softmax_35_axis_0 = const()[name = string("softmax_35_axis_0"), val = int32(-2)]; + tensor softmax_35 = softmax(axis = softmax_35_axis_0, x = attention_17_masked_scaled_scores_1)[name = string("softmax_35")]; + bool attention_17_attention_1_transpose_x_0 = const()[name = string("attention_17_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_17_attention_1_transpose_y_0 = const()[name = string("attention_17_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_17_attention_1 = matmul(transpose_x = attention_17_attention_1_transpose_x_0, transpose_y = attention_17_attention_1_transpose_y_0, x = softmax_35, y = attention_17_slice_value_cache_heads_1)[name = string("attention_17_attention_1")]; + int32 attention_17_concat_attention_all_heads_axis_0 = const()[name = string("attention_17_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_17_concat_attention_all_heads_interleave_0 = const()[name = string("attention_17_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_17_concat_attention_all_heads = concat(axis = attention_17_concat_attention_all_heads_axis_0, interleave = attention_17_concat_attention_all_heads_interleave_0, values = (attention_17_attention_0, attention_17_attention_1))[name = string("attention_17_concat_attention_all_heads")]; + tensor attention_17_channels_first_retransposed_perm_0 = const()[name = string("attention_17_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_17_reshaped_shape_0 = const()[name = string("attention_17_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_17_channels_first_retransposed = transpose(perm = attention_17_channels_first_retransposed_perm_0, x = attention_17_concat_attention_all_heads)[name = string("transpose_13")]; + tensor attention_17_reshaped = reshape(shape = attention_17_reshaped_shape_0, x = attention_17_channels_first_retransposed)[name = string("attention_17_reshaped")]; + tensor attention_17_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489487488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490089664))))[name = string("attention_17_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_81 = constexpr_blockwise_shift_scale(data = attention_17_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490118400))))[name = string("constexpr_blockwise_shift_scale_81")]; + tensor attention_17_outproj_strides_0 = const()[name = string("attention_17_outproj_strides_0"), val = tensor([1])]; + string attention_17_outproj_pad_type_0 = const()[name = string("attention_17_outproj_pad_type_0"), val = string("valid")]; + tensor attention_17_outproj_pad_0 = const()[name = string("attention_17_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_17_outproj_dilations_0 = const()[name = string("attention_17_outproj_dilations_0"), val = tensor([1])]; + int32 attention_17_outproj_groups_0 = const()[name = string("attention_17_outproj_groups_0"), val = int32(1)]; + tensor attention_17_outproj = conv(dilations = attention_17_outproj_dilations_0, groups = attention_17_outproj_groups_0, pad = attention_17_outproj_pad_0, pad_type = attention_17_outproj_pad_type_0, strides = attention_17_outproj_strides_0, weight = constexpr_blockwise_shift_scale_81, x = attention_17_reshaped)[name = string("attention_17_outproj")]; + tensor block_17_residual_1 = add(x = block_16_residual_2, y = attention_17_outproj)[name = string("block_17_residual_1")]; + tensor block_17_ffn_rmsnorm_abs = abs(x = block_17_residual_1)[name = string("block_17_ffn_rmsnorm_abs")]; + tensor block_17_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_17_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_17_ffn_rmsnorm_maxval = reduce_max(axes = block_17_ffn_rmsnorm_maxval_axes_0, keep_dims = block_17_ffn_rmsnorm_maxval_keep_dims_0, x = block_17_ffn_rmsnorm_abs)[name = string("block_17_ffn_rmsnorm_maxval")]; + fp16 block_17_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_17_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_17_ffn_rmsnorm_maxval_clipped = clip(alpha = block_17_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_17_ffn_rmsnorm_maxval_clipped_beta_0, x = block_17_ffn_rmsnorm_maxval)[name = string("block_17_ffn_rmsnorm_maxval_clipped")]; + tensor block_17_ffn_rmsnorm_scaled = real_div(x = block_17_residual_1, y = block_17_ffn_rmsnorm_maxval_clipped)[name = string("block_17_ffn_rmsnorm_scaled")]; + tensor block_17_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_17_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_17_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_17_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_17_ffn_rmsnorm_scaled)[name = string("block_17_ffn_rmsnorm_squared_sum")]; + fp16 block_17_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_17_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_17_ffn_rmsnorm_rsqrt_epsilon_0, x = block_17_ffn_rmsnorm_squared_sum)[name = string("block_17_ffn_rmsnorm_rsqrt")]; + fp16 block_17_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_17_ffn_rmsnorm_dim_scaled = mul(x = block_17_ffn_rmsnorm_scaled, y = block_17_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_17_ffn_rmsnorm_dim_scaled")]; + tensor block_17_ffn_rmsnorm_normalized = mul(x = block_17_ffn_rmsnorm_dim_scaled, y = block_17_ffn_rmsnorm_rsqrt)[name = string("block_17_ffn_rmsnorm_normalized")]; + tensor block_17_ffn_rmsnorm_y_0 = const()[name = string("block_17_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490120256)))]; + tensor block_17_ffn_rmsnorm = mul(x = block_17_ffn_rmsnorm_normalized, y = block_17_ffn_rmsnorm_y_0)[name = string("block_17_ffn_rmsnorm")]; + tensor block_17_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490122112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493390784))))[name = string("block_17_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_82 = constexpr_blockwise_shift_scale(data = block_17_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493546496))))[name = string("constexpr_blockwise_shift_scale_82")]; + tensor block_17_ffn_inproj_strides_0 = const()[name = string("block_17_ffn_inproj_strides_0"), val = tensor([1])]; + string block_17_ffn_inproj_pad_type_0 = const()[name = string("block_17_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_17_ffn_inproj_pad_0 = const()[name = string("block_17_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_17_ffn_inproj_dilations_0 = const()[name = string("block_17_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_17_ffn_inproj_groups_0 = const()[name = string("block_17_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_17_ffn_inproj = conv(dilations = block_17_ffn_inproj_dilations_0, groups = block_17_ffn_inproj_groups_0, pad = block_17_ffn_inproj_pad_0, pad_type = block_17_ffn_inproj_pad_type_0, strides = block_17_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_82, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_inproj")]; + tensor block_17_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496824960))))[name = string("block_17_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_83 = constexpr_blockwise_shift_scale(data = block_17_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496980672))))[name = string("constexpr_blockwise_shift_scale_83")]; + tensor block_17_ffn_g_strides_0 = const()[name = string("block_17_ffn_g_strides_0"), val = tensor([1])]; + string block_17_ffn_g_pad_type_0 = const()[name = string("block_17_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_17_ffn_g_pad_0 = const()[name = string("block_17_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_17_ffn_g_dilations_0 = const()[name = string("block_17_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_17_ffn_g_groups_0 = const()[name = string("block_17_ffn_g_groups_0"), val = int32(1)]; + tensor block_17_ffn_g = conv(dilations = block_17_ffn_g_dilations_0, groups = block_17_ffn_g_groups_0, pad = block_17_ffn_g_pad_0, pad_type = block_17_ffn_g_pad_type_0, strides = block_17_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_83, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_g")]; + tensor block_17_ffn_g_activation = silu(x = block_17_ffn_g)[name = string("block_17_ffn_g_activation")]; + tensor block_17_ffn_x_gated = mul(x = block_17_ffn_inproj, y = block_17_ffn_g_activation)[name = string("block_17_ffn_x_gated")]; + tensor block_17_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496990464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500259136))))[name = string("block_17_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_84 = constexpr_blockwise_shift_scale(data = block_17_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500287872))))[name = string("constexpr_blockwise_shift_scale_84")]; + tensor block_17_ffn_outproj_strides_0 = const()[name = string("block_17_ffn_outproj_strides_0"), val = tensor([1])]; + string block_17_ffn_outproj_pad_type_0 = const()[name = string("block_17_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_17_ffn_outproj_pad_0 = const()[name = string("block_17_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_17_ffn_outproj_dilations_0 = const()[name = string("block_17_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_17_ffn_outproj_groups_0 = const()[name = string("block_17_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_17_ffn_outproj = conv(dilations = block_17_ffn_outproj_dilations_0, groups = block_17_ffn_outproj_groups_0, pad = block_17_ffn_outproj_pad_0, pad_type = block_17_ffn_outproj_pad_type_0, strides = block_17_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_84, x = block_17_ffn_x_gated)[name = string("block_17_ffn_outproj")]; + tensor block_17_residual_2 = add(x = block_17_ffn_outproj, y = block_17_residual_1)[name = string("block_17_residual_2")]; + tensor block_18_attention_rmsnorm_abs = abs(x = block_17_residual_2)[name = string("block_18_attention_rmsnorm_abs")]; + tensor block_18_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_18_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_18_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_18_attention_rmsnorm_maxval = reduce_max(axes = block_18_attention_rmsnorm_maxval_axes_0, keep_dims = block_18_attention_rmsnorm_maxval_keep_dims_0, x = block_18_attention_rmsnorm_abs)[name = string("block_18_attention_rmsnorm_maxval")]; + fp16 block_18_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_18_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_18_attention_rmsnorm_maxval_clipped = clip(alpha = block_18_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_18_attention_rmsnorm_maxval_clipped_beta_0, x = block_18_attention_rmsnorm_maxval)[name = string("block_18_attention_rmsnorm_maxval_clipped")]; + tensor block_18_attention_rmsnorm_scaled = real_div(x = block_17_residual_2, y = block_18_attention_rmsnorm_maxval_clipped)[name = string("block_18_attention_rmsnorm_scaled")]; + tensor block_18_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_18_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_18_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_18_attention_rmsnorm_squared_sum_keep_dims_0, x = block_18_attention_rmsnorm_scaled)[name = string("block_18_attention_rmsnorm_squared_sum")]; + fp16 block_18_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_18_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_18_attention_rmsnorm_rsqrt_epsilon_0, x = block_18_attention_rmsnorm_squared_sum)[name = string("block_18_attention_rmsnorm_rsqrt")]; + fp16 block_18_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_18_attention_rmsnorm_dim_scaled = mul(x = block_18_attention_rmsnorm_scaled, y = block_18_attention_rmsnorm_dim_scaled_y_0)[name = string("block_18_attention_rmsnorm_dim_scaled")]; + tensor block_18_attention_rmsnorm_normalized = mul(x = block_18_attention_rmsnorm_dim_scaled, y = block_18_attention_rmsnorm_rsqrt)[name = string("block_18_attention_rmsnorm_normalized")]; + tensor block_18_attention_rmsnorm_y_0 = const()[name = string("block_18_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500289728)))]; + tensor block_18_attention_rmsnorm = mul(x = block_18_attention_rmsnorm_normalized, y = block_18_attention_rmsnorm_y_0)[name = string("block_18_attention_rmsnorm")]; + tensor attention_18_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500291584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501065792))))[name = string("attention_18_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_85 = constexpr_blockwise_shift_scale(data = attention_18_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501102720))))[name = string("constexpr_blockwise_shift_scale_85")]; + tensor attention_18_qkvproj_bias_0 = const()[name = string("attention_18_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501105088)))]; + tensor attention_18_qkvproj_strides_0 = const()[name = string("attention_18_qkvproj_strides_0"), val = tensor([1])]; + string attention_18_qkvproj_pad_type_0 = const()[name = string("attention_18_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_18_qkvproj_pad_0 = const()[name = string("attention_18_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_18_qkvproj_dilations_0 = const()[name = string("attention_18_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_18_qkvproj_groups_0 = const()[name = string("attention_18_qkvproj_groups_0"), val = int32(1)]; + tensor attention_18_qkvproj = conv(bias = attention_18_qkvproj_bias_0, dilations = attention_18_qkvproj_dilations_0, groups = attention_18_qkvproj_groups_0, pad = attention_18_qkvproj_pad_0, pad_type = attention_18_qkvproj_pad_type_0, strides = attention_18_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_85, x = block_18_attention_rmsnorm)[name = string("attention_18_qkvproj")]; + tensor attention_18_head_reshape_shape_0 = const()[name = string("attention_18_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_18_head_reshape = reshape(shape = attention_18_head_reshape_shape_0, x = attention_18_qkvproj)[name = string("attention_18_head_reshape")]; + tensor attention_18_head_transpose_perm_0 = const()[name = string("attention_18_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_18_split_qkv_heads_axis_0 = const()[name = string("attention_18_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_18_split_qkv_heads_split_sizes_0 = const()[name = string("attention_18_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_18_head_transpose = transpose(perm = attention_18_head_transpose_perm_0, x = attention_18_head_reshape)[name = string("transpose_12")]; + tensor attention_18_split_qkv_heads_0, tensor attention_18_split_qkv_heads_1, tensor attention_18_split_qkv_heads_2 = split(axis = attention_18_split_qkv_heads_axis_0, split_sizes = attention_18_split_qkv_heads_split_sizes_0, x = attention_18_head_transpose)[name = string("attention_18_split_qkv_heads")]; + tensor attention_18_q_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_18_q_rope_lhs_mult")]; + int32 attention_18_q_rotate_half_split_num_splits_0 = const()[name = string("attention_18_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_18_q_rotate_half_split_axis_0 = const()[name = string("attention_18_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_18_q_rotate_half_split_0, tensor attention_18_q_rotate_half_split_1 = split(axis = attention_18_q_rotate_half_split_axis_0, num_splits = attention_18_q_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_0)[name = string("attention_18_q_rotate_half_split")]; + fp16 attention_18_q_rotate_half_neg_y_0 = const()[name = string("attention_18_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_18_q_rotate_half_neg = mul(x = attention_18_q_rotate_half_split_1, y = attention_18_q_rotate_half_neg_y_0)[name = string("attention_18_q_rotate_half_neg")]; + int32 attention_18_q_rotate_half_concat_axis_0 = const()[name = string("attention_18_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_18_q_rotate_half_concat_interleave_0 = const()[name = string("attention_18_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_18_q_rotate_half_concat = concat(axis = attention_18_q_rotate_half_concat_axis_0, interleave = attention_18_q_rotate_half_concat_interleave_0, values = (attention_18_q_rotate_half_neg, attention_18_q_rotate_half_split_0))[name = string("attention_18_q_rotate_half_concat")]; + tensor attention_18_q_rope_rhs_mult = mul(x = attention_18_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_q_rope_rhs_mult")]; + tensor attention_18_q_rope = add(x = attention_18_q_rope_lhs_mult, y = attention_18_q_rope_rhs_mult)[name = string("attention_18_q_rope")]; + tensor attention_18_k_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_18_k_rope_lhs_mult")]; + int32 attention_18_k_rotate_half_split_num_splits_0 = const()[name = string("attention_18_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_18_k_rotate_half_split_axis_0 = const()[name = string("attention_18_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_18_k_rotate_half_split_0, tensor attention_18_k_rotate_half_split_1 = split(axis = attention_18_k_rotate_half_split_axis_0, num_splits = attention_18_k_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_1)[name = string("attention_18_k_rotate_half_split")]; + fp16 attention_18_k_rotate_half_neg_y_0 = const()[name = string("attention_18_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_18_k_rotate_half_neg = mul(x = attention_18_k_rotate_half_split_1, y = attention_18_k_rotate_half_neg_y_0)[name = string("attention_18_k_rotate_half_neg")]; + int32 attention_18_k_rotate_half_concat_axis_0 = const()[name = string("attention_18_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_18_k_rotate_half_concat_interleave_0 = const()[name = string("attention_18_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_18_k_rotate_half_concat = concat(axis = attention_18_k_rotate_half_concat_axis_0, interleave = attention_18_k_rotate_half_concat_interleave_0, values = (attention_18_k_rotate_half_neg, attention_18_k_rotate_half_split_0))[name = string("attention_18_k_rotate_half_concat")]; + tensor attention_18_k_rope_rhs_mult = mul(x = attention_18_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_k_rope_rhs_mult")]; + tensor attention_18_k_rope = add(x = attention_18_k_rope_lhs_mult, y = attention_18_k_rope_rhs_mult)[name = string("attention_18_k_rope")]; + int32 attention_18_q_splits_axis_0 = const()[name = string("attention_18_q_splits_axis_0"), val = int32(1)]; + int32 attention_18_q_splits_num_splits_0 = const()[name = string("attention_18_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_18_q_splits_0, tensor attention_18_q_splits_1 = split(axis = attention_18_q_splits_axis_0, num_splits = attention_18_q_splits_num_splits_0, x = attention_18_q_rope)[name = string("attention_18_q_splits")]; + tensor attention_18_update_begin_0_values0_0 = const()[name = string("attention_18_update_begin_0_values0_0"), val = tensor([18])]; + tensor attention_18_update_begin_0_values1_0 = const()[name = string("attention_18_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_18_update_begin_0_values3_0 = const()[name = string("attention_18_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_18_update_begin_0_axis_0 = const()[name = string("attention_18_update_begin_0_axis_0"), val = int32(0)]; + bool attention_18_update_begin_0_interleave_0 = const()[name = string("attention_18_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_18_update_begin_0 = concat(axis = attention_18_update_begin_0_axis_0, interleave = attention_18_update_begin_0_interleave_0, values = (attention_18_update_begin_0_values0_0, attention_18_update_begin_0_values1_0, query_pos1, attention_18_update_begin_0_values3_0))[name = string("attention_18_update_begin_0")]; + tensor attention_18_update_end_0_values0_0 = const()[name = string("attention_18_update_end_0_values0_0"), val = tensor([19])]; + tensor attention_18_update_end_0_values1_0 = const()[name = string("attention_18_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_18_update_end_0_values3_0 = const()[name = string("attention_18_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_18_update_end_0_axis_0 = const()[name = string("attention_18_update_end_0_axis_0"), val = int32(0)]; + bool attention_18_update_end_0_interleave_0 = const()[name = string("attention_18_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_18_update_end_0 = concat(axis = attention_18_update_end_0_axis_0, interleave = attention_18_update_end_0_interleave_0, values = (attention_18_update_end_0_values0_0, attention_18_update_end_0_values1_0, end_pos_0, attention_18_update_end_0_values3_0))[name = string("attention_18_update_end_0")]; + tensor attention_18_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_updated_key_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_key_cache_0_squeeze_mask_0, update = attention_18_k_rope, x = coreml_update_state_34)[name = string("attention_18_updated_key_cache_0")]; + write_state(data = attention_18_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_36_write_state")]; + tensor coreml_update_state_36 = read_state(input = key_cache_state)[name = string("coreml_update_state_36")]; + tensor attention_18_key_cache_begin_0 = const()[name = string("attention_18_key_cache_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor attention_18_key_cache_end_0 = const()[name = string("attention_18_key_cache_end_0"), val = tensor([19, 2, 512, 64])]; + tensor attention_18_key_cache_squeeze_mask_0 = const()[name = string("attention_18_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_key_cache = slice_by_index(begin = attention_18_key_cache_begin_0, end = attention_18_key_cache_end_0, squeeze_mask = attention_18_key_cache_squeeze_mask_0, x = coreml_update_state_36)[name = string("attention_18_key_cache")]; + int32 attention_18_key_cache_head_axis_0 = const()[name = string("attention_18_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_18_key_cache_head_num_splits_0 = const()[name = string("attention_18_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_18_key_cache_head_0, tensor attention_18_key_cache_head_1 = split(axis = attention_18_key_cache_head_axis_0, num_splits = attention_18_key_cache_head_num_splits_0, x = attention_18_key_cache)[name = string("attention_18_key_cache_head")]; + tensor attention_18_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_updated_value_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_value_cache_0_squeeze_mask_0, update = attention_18_split_qkv_heads_2, x = coreml_update_state_35)[name = string("attention_18_updated_value_cache_0")]; + write_state(data = attention_18_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_37_write_state")]; + tensor coreml_update_state_37 = read_state(input = value_cache_state)[name = string("coreml_update_state_37")]; + tensor attention_18_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_18_slice_current_layer_value_cache_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor attention_18_slice_current_layer_value_cache_end_0 = const()[name = string("attention_18_slice_current_layer_value_cache_end_0"), val = tensor([19, 2, 512, 64])]; + tensor attention_18_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_18_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_slice_current_layer_value_cache = slice_by_index(begin = attention_18_slice_current_layer_value_cache_begin_0, end = attention_18_slice_current_layer_value_cache_end_0, squeeze_mask = attention_18_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_37)[name = string("attention_18_slice_current_layer_value_cache")]; + int32 attention_18_slice_value_cache_heads_axis_0 = const()[name = string("attention_18_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_18_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_18_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_18_slice_value_cache_heads_0, tensor attention_18_slice_value_cache_heads_1 = split(axis = attention_18_slice_value_cache_heads_axis_0, num_splits = attention_18_slice_value_cache_heads_num_splits_0, x = attention_18_slice_current_layer_value_cache)[name = string("attention_18_slice_value_cache_heads")]; + bool attention_18_scores_0_transpose_y_0 = const()[name = string("attention_18_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_18_scores_0_transpose_x_0 = const()[name = string("attention_18_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_18_scores_0 = matmul(transpose_x = attention_18_scores_0_transpose_x_0, transpose_y = attention_18_scores_0_transpose_y_0, x = attention_18_key_cache_head_0, y = attention_18_q_splits_0)[name = string("attention_18_scores_0")]; + fp16 attention_18_scaled_scores_0_y_0 = const()[name = string("attention_18_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_18_scaled_scores_0 = mul(x = attention_18_scores_0, y = attention_18_scaled_scores_0_y_0)[name = string("attention_18_scaled_scores_0")]; + tensor attention_18_masked_scaled_scores_0 = add(x = attention_18_scaled_scores_0, y = transpose_0)[name = string("attention_18_masked_scaled_scores_0")]; + int32 softmax_36_axis_0 = const()[name = string("softmax_36_axis_0"), val = int32(-2)]; + tensor softmax_36 = softmax(axis = softmax_36_axis_0, x = attention_18_masked_scaled_scores_0)[name = string("softmax_36")]; + bool attention_18_attention_0_transpose_x_0 = const()[name = string("attention_18_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_18_attention_0_transpose_y_0 = const()[name = string("attention_18_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_18_attention_0 = matmul(transpose_x = attention_18_attention_0_transpose_x_0, transpose_y = attention_18_attention_0_transpose_y_0, x = softmax_36, y = attention_18_slice_value_cache_heads_0)[name = string("attention_18_attention_0")]; + bool attention_18_scores_1_transpose_y_0 = const()[name = string("attention_18_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_18_scores_1_transpose_x_0 = const()[name = string("attention_18_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_18_scores_1 = matmul(transpose_x = attention_18_scores_1_transpose_x_0, transpose_y = attention_18_scores_1_transpose_y_0, x = attention_18_key_cache_head_1, y = attention_18_q_splits_1)[name = string("attention_18_scores_1")]; + fp16 attention_18_scaled_scores_1_y_0 = const()[name = string("attention_18_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_18_scaled_scores_1 = mul(x = attention_18_scores_1, y = attention_18_scaled_scores_1_y_0)[name = string("attention_18_scaled_scores_1")]; + tensor attention_18_masked_scaled_scores_1 = add(x = attention_18_scaled_scores_1, y = transpose_0)[name = string("attention_18_masked_scaled_scores_1")]; + int32 softmax_37_axis_0 = const()[name = string("softmax_37_axis_0"), val = int32(-2)]; + tensor softmax_37 = softmax(axis = softmax_37_axis_0, x = attention_18_masked_scaled_scores_1)[name = string("softmax_37")]; + bool attention_18_attention_1_transpose_x_0 = const()[name = string("attention_18_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_18_attention_1_transpose_y_0 = const()[name = string("attention_18_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_18_attention_1 = matmul(transpose_x = attention_18_attention_1_transpose_x_0, transpose_y = attention_18_attention_1_transpose_y_0, x = softmax_37, y = attention_18_slice_value_cache_heads_1)[name = string("attention_18_attention_1")]; + int32 attention_18_concat_attention_all_heads_axis_0 = const()[name = string("attention_18_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_18_concat_attention_all_heads_interleave_0 = const()[name = string("attention_18_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_18_concat_attention_all_heads = concat(axis = attention_18_concat_attention_all_heads_axis_0, interleave = attention_18_concat_attention_all_heads_interleave_0, values = (attention_18_attention_0, attention_18_attention_1))[name = string("attention_18_concat_attention_all_heads")]; + tensor attention_18_channels_first_retransposed_perm_0 = const()[name = string("attention_18_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_18_reshaped_shape_0 = const()[name = string("attention_18_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_18_channels_first_retransposed = transpose(perm = attention_18_channels_first_retransposed_perm_0, x = attention_18_concat_attention_all_heads)[name = string("transpose_11")]; + tensor attention_18_reshaped = reshape(shape = attention_18_reshaped_shape_0, x = attention_18_channels_first_retransposed)[name = string("attention_18_reshaped")]; + tensor attention_18_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501107456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501709632))))[name = string("attention_18_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_86 = constexpr_blockwise_shift_scale(data = attention_18_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501738368))))[name = string("constexpr_blockwise_shift_scale_86")]; + tensor attention_18_outproj_strides_0 = const()[name = string("attention_18_outproj_strides_0"), val = tensor([1])]; + string attention_18_outproj_pad_type_0 = const()[name = string("attention_18_outproj_pad_type_0"), val = string("valid")]; + tensor attention_18_outproj_pad_0 = const()[name = string("attention_18_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_18_outproj_dilations_0 = const()[name = string("attention_18_outproj_dilations_0"), val = tensor([1])]; + int32 attention_18_outproj_groups_0 = const()[name = string("attention_18_outproj_groups_0"), val = int32(1)]; + tensor attention_18_outproj = conv(dilations = attention_18_outproj_dilations_0, groups = attention_18_outproj_groups_0, pad = attention_18_outproj_pad_0, pad_type = attention_18_outproj_pad_type_0, strides = attention_18_outproj_strides_0, weight = constexpr_blockwise_shift_scale_86, x = attention_18_reshaped)[name = string("attention_18_outproj")]; + tensor block_18_residual_1 = add(x = block_17_residual_2, y = attention_18_outproj)[name = string("block_18_residual_1")]; + tensor block_18_ffn_rmsnorm_abs = abs(x = block_18_residual_1)[name = string("block_18_ffn_rmsnorm_abs")]; + tensor block_18_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_18_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_18_ffn_rmsnorm_maxval = reduce_max(axes = block_18_ffn_rmsnorm_maxval_axes_0, keep_dims = block_18_ffn_rmsnorm_maxval_keep_dims_0, x = block_18_ffn_rmsnorm_abs)[name = string("block_18_ffn_rmsnorm_maxval")]; + fp16 block_18_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_18_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_18_ffn_rmsnorm_maxval_clipped = clip(alpha = block_18_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_18_ffn_rmsnorm_maxval_clipped_beta_0, x = block_18_ffn_rmsnorm_maxval)[name = string("block_18_ffn_rmsnorm_maxval_clipped")]; + tensor block_18_ffn_rmsnorm_scaled = real_div(x = block_18_residual_1, y = block_18_ffn_rmsnorm_maxval_clipped)[name = string("block_18_ffn_rmsnorm_scaled")]; + tensor block_18_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_18_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_18_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_18_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_18_ffn_rmsnorm_scaled)[name = string("block_18_ffn_rmsnorm_squared_sum")]; + fp16 block_18_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_18_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_18_ffn_rmsnorm_rsqrt_epsilon_0, x = block_18_ffn_rmsnorm_squared_sum)[name = string("block_18_ffn_rmsnorm_rsqrt")]; + fp16 block_18_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_18_ffn_rmsnorm_dim_scaled = mul(x = block_18_ffn_rmsnorm_scaled, y = block_18_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_18_ffn_rmsnorm_dim_scaled")]; + tensor block_18_ffn_rmsnorm_normalized = mul(x = block_18_ffn_rmsnorm_dim_scaled, y = block_18_ffn_rmsnorm_rsqrt)[name = string("block_18_ffn_rmsnorm_normalized")]; + tensor block_18_ffn_rmsnorm_y_0 = const()[name = string("block_18_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501740224)))]; + tensor block_18_ffn_rmsnorm = mul(x = block_18_ffn_rmsnorm_normalized, y = block_18_ffn_rmsnorm_y_0)[name = string("block_18_ffn_rmsnorm")]; + tensor block_18_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501742080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505010752))))[name = string("block_18_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_87 = constexpr_blockwise_shift_scale(data = block_18_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505166464))))[name = string("constexpr_blockwise_shift_scale_87")]; + tensor block_18_ffn_inproj_strides_0 = const()[name = string("block_18_ffn_inproj_strides_0"), val = tensor([1])]; + string block_18_ffn_inproj_pad_type_0 = const()[name = string("block_18_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_18_ffn_inproj_pad_0 = const()[name = string("block_18_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_18_ffn_inproj_dilations_0 = const()[name = string("block_18_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_18_ffn_inproj_groups_0 = const()[name = string("block_18_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_18_ffn_inproj = conv(dilations = block_18_ffn_inproj_dilations_0, groups = block_18_ffn_inproj_groups_0, pad = block_18_ffn_inproj_pad_0, pad_type = block_18_ffn_inproj_pad_type_0, strides = block_18_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_87, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_inproj")]; + tensor block_18_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505176256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508444928))))[name = string("block_18_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_88 = constexpr_blockwise_shift_scale(data = block_18_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508600640))))[name = string("constexpr_blockwise_shift_scale_88")]; + tensor block_18_ffn_g_strides_0 = const()[name = string("block_18_ffn_g_strides_0"), val = tensor([1])]; + string block_18_ffn_g_pad_type_0 = const()[name = string("block_18_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_18_ffn_g_pad_0 = const()[name = string("block_18_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_18_ffn_g_dilations_0 = const()[name = string("block_18_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_18_ffn_g_groups_0 = const()[name = string("block_18_ffn_g_groups_0"), val = int32(1)]; + tensor block_18_ffn_g = conv(dilations = block_18_ffn_g_dilations_0, groups = block_18_ffn_g_groups_0, pad = block_18_ffn_g_pad_0, pad_type = block_18_ffn_g_pad_type_0, strides = block_18_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_88, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_g")]; + tensor block_18_ffn_g_activation = silu(x = block_18_ffn_g)[name = string("block_18_ffn_g_activation")]; + tensor block_18_ffn_x_gated = mul(x = block_18_ffn_inproj, y = block_18_ffn_g_activation)[name = string("block_18_ffn_x_gated")]; + tensor block_18_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508610432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511879104))))[name = string("block_18_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_89 = constexpr_blockwise_shift_scale(data = block_18_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511907840))))[name = string("constexpr_blockwise_shift_scale_89")]; + tensor block_18_ffn_outproj_strides_0 = const()[name = string("block_18_ffn_outproj_strides_0"), val = tensor([1])]; + string block_18_ffn_outproj_pad_type_0 = const()[name = string("block_18_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_18_ffn_outproj_pad_0 = const()[name = string("block_18_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_18_ffn_outproj_dilations_0 = const()[name = string("block_18_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_18_ffn_outproj_groups_0 = const()[name = string("block_18_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_18_ffn_outproj = conv(dilations = block_18_ffn_outproj_dilations_0, groups = block_18_ffn_outproj_groups_0, pad = block_18_ffn_outproj_pad_0, pad_type = block_18_ffn_outproj_pad_type_0, strides = block_18_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_89, x = block_18_ffn_x_gated)[name = string("block_18_ffn_outproj")]; + tensor block_18_residual_2 = add(x = block_18_ffn_outproj, y = block_18_residual_1)[name = string("block_18_residual_2")]; + tensor block_19_attention_rmsnorm_abs = abs(x = block_18_residual_2)[name = string("block_19_attention_rmsnorm_abs")]; + tensor block_19_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_19_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_19_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_19_attention_rmsnorm_maxval = reduce_max(axes = block_19_attention_rmsnorm_maxval_axes_0, keep_dims = block_19_attention_rmsnorm_maxval_keep_dims_0, x = block_19_attention_rmsnorm_abs)[name = string("block_19_attention_rmsnorm_maxval")]; + fp16 block_19_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_19_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_19_attention_rmsnorm_maxval_clipped = clip(alpha = block_19_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_19_attention_rmsnorm_maxval_clipped_beta_0, x = block_19_attention_rmsnorm_maxval)[name = string("block_19_attention_rmsnorm_maxval_clipped")]; + tensor block_19_attention_rmsnorm_scaled = real_div(x = block_18_residual_2, y = block_19_attention_rmsnorm_maxval_clipped)[name = string("block_19_attention_rmsnorm_scaled")]; + tensor block_19_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_19_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_19_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_19_attention_rmsnorm_squared_sum_keep_dims_0, x = block_19_attention_rmsnorm_scaled)[name = string("block_19_attention_rmsnorm_squared_sum")]; + fp16 block_19_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_19_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_19_attention_rmsnorm_rsqrt_epsilon_0, x = block_19_attention_rmsnorm_squared_sum)[name = string("block_19_attention_rmsnorm_rsqrt")]; + fp16 block_19_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_19_attention_rmsnorm_dim_scaled = mul(x = block_19_attention_rmsnorm_scaled, y = block_19_attention_rmsnorm_dim_scaled_y_0)[name = string("block_19_attention_rmsnorm_dim_scaled")]; + tensor block_19_attention_rmsnorm_normalized = mul(x = block_19_attention_rmsnorm_dim_scaled, y = block_19_attention_rmsnorm_rsqrt)[name = string("block_19_attention_rmsnorm_normalized")]; + tensor block_19_attention_rmsnorm_y_0 = const()[name = string("block_19_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511909696)))]; + tensor block_19_attention_rmsnorm = mul(x = block_19_attention_rmsnorm_normalized, y = block_19_attention_rmsnorm_y_0)[name = string("block_19_attention_rmsnorm")]; + tensor attention_19_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511911552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512685760))))[name = string("attention_19_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_90 = constexpr_blockwise_shift_scale(data = attention_19_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512722688))))[name = string("constexpr_blockwise_shift_scale_90")]; + tensor attention_19_qkvproj_bias_0 = const()[name = string("attention_19_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512725056)))]; + tensor attention_19_qkvproj_strides_0 = const()[name = string("attention_19_qkvproj_strides_0"), val = tensor([1])]; + string attention_19_qkvproj_pad_type_0 = const()[name = string("attention_19_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_19_qkvproj_pad_0 = const()[name = string("attention_19_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_19_qkvproj_dilations_0 = const()[name = string("attention_19_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_19_qkvproj_groups_0 = const()[name = string("attention_19_qkvproj_groups_0"), val = int32(1)]; + tensor attention_19_qkvproj = conv(bias = attention_19_qkvproj_bias_0, dilations = attention_19_qkvproj_dilations_0, groups = attention_19_qkvproj_groups_0, pad = attention_19_qkvproj_pad_0, pad_type = attention_19_qkvproj_pad_type_0, strides = attention_19_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_90, x = block_19_attention_rmsnorm)[name = string("attention_19_qkvproj")]; + tensor attention_19_head_reshape_shape_0 = const()[name = string("attention_19_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_19_head_reshape = reshape(shape = attention_19_head_reshape_shape_0, x = attention_19_qkvproj)[name = string("attention_19_head_reshape")]; + tensor attention_19_head_transpose_perm_0 = const()[name = string("attention_19_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_19_split_qkv_heads_axis_0 = const()[name = string("attention_19_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_19_split_qkv_heads_split_sizes_0 = const()[name = string("attention_19_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_19_head_transpose = transpose(perm = attention_19_head_transpose_perm_0, x = attention_19_head_reshape)[name = string("transpose_10")]; + tensor attention_19_split_qkv_heads_0, tensor attention_19_split_qkv_heads_1, tensor attention_19_split_qkv_heads_2 = split(axis = attention_19_split_qkv_heads_axis_0, split_sizes = attention_19_split_qkv_heads_split_sizes_0, x = attention_19_head_transpose)[name = string("attention_19_split_qkv_heads")]; + tensor attention_19_q_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_19_q_rope_lhs_mult")]; + int32 attention_19_q_rotate_half_split_num_splits_0 = const()[name = string("attention_19_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_19_q_rotate_half_split_axis_0 = const()[name = string("attention_19_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_19_q_rotate_half_split_0, tensor attention_19_q_rotate_half_split_1 = split(axis = attention_19_q_rotate_half_split_axis_0, num_splits = attention_19_q_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_0)[name = string("attention_19_q_rotate_half_split")]; + fp16 attention_19_q_rotate_half_neg_y_0 = const()[name = string("attention_19_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_19_q_rotate_half_neg = mul(x = attention_19_q_rotate_half_split_1, y = attention_19_q_rotate_half_neg_y_0)[name = string("attention_19_q_rotate_half_neg")]; + int32 attention_19_q_rotate_half_concat_axis_0 = const()[name = string("attention_19_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_19_q_rotate_half_concat_interleave_0 = const()[name = string("attention_19_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_19_q_rotate_half_concat = concat(axis = attention_19_q_rotate_half_concat_axis_0, interleave = attention_19_q_rotate_half_concat_interleave_0, values = (attention_19_q_rotate_half_neg, attention_19_q_rotate_half_split_0))[name = string("attention_19_q_rotate_half_concat")]; + tensor attention_19_q_rope_rhs_mult = mul(x = attention_19_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_q_rope_rhs_mult")]; + tensor attention_19_q_rope = add(x = attention_19_q_rope_lhs_mult, y = attention_19_q_rope_rhs_mult)[name = string("attention_19_q_rope")]; + tensor attention_19_k_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_19_k_rope_lhs_mult")]; + int32 attention_19_k_rotate_half_split_num_splits_0 = const()[name = string("attention_19_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_19_k_rotate_half_split_axis_0 = const()[name = string("attention_19_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_19_k_rotate_half_split_0, tensor attention_19_k_rotate_half_split_1 = split(axis = attention_19_k_rotate_half_split_axis_0, num_splits = attention_19_k_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_1)[name = string("attention_19_k_rotate_half_split")]; + fp16 attention_19_k_rotate_half_neg_y_0 = const()[name = string("attention_19_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_19_k_rotate_half_neg = mul(x = attention_19_k_rotate_half_split_1, y = attention_19_k_rotate_half_neg_y_0)[name = string("attention_19_k_rotate_half_neg")]; + int32 attention_19_k_rotate_half_concat_axis_0 = const()[name = string("attention_19_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_19_k_rotate_half_concat_interleave_0 = const()[name = string("attention_19_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_19_k_rotate_half_concat = concat(axis = attention_19_k_rotate_half_concat_axis_0, interleave = attention_19_k_rotate_half_concat_interleave_0, values = (attention_19_k_rotate_half_neg, attention_19_k_rotate_half_split_0))[name = string("attention_19_k_rotate_half_concat")]; + tensor attention_19_k_rope_rhs_mult = mul(x = attention_19_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_k_rope_rhs_mult")]; + tensor attention_19_k_rope = add(x = attention_19_k_rope_lhs_mult, y = attention_19_k_rope_rhs_mult)[name = string("attention_19_k_rope")]; + int32 attention_19_q_splits_axis_0 = const()[name = string("attention_19_q_splits_axis_0"), val = int32(1)]; + int32 attention_19_q_splits_num_splits_0 = const()[name = string("attention_19_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_19_q_splits_0, tensor attention_19_q_splits_1 = split(axis = attention_19_q_splits_axis_0, num_splits = attention_19_q_splits_num_splits_0, x = attention_19_q_rope)[name = string("attention_19_q_splits")]; + tensor attention_19_update_begin_0_values0_0 = const()[name = string("attention_19_update_begin_0_values0_0"), val = tensor([19])]; + tensor attention_19_update_begin_0_values1_0 = const()[name = string("attention_19_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_19_update_begin_0_values3_0 = const()[name = string("attention_19_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_19_update_begin_0_axis_0 = const()[name = string("attention_19_update_begin_0_axis_0"), val = int32(0)]; + bool attention_19_update_begin_0_interleave_0 = const()[name = string("attention_19_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_19_update_begin_0 = concat(axis = attention_19_update_begin_0_axis_0, interleave = attention_19_update_begin_0_interleave_0, values = (attention_19_update_begin_0_values0_0, attention_19_update_begin_0_values1_0, query_pos1, attention_19_update_begin_0_values3_0))[name = string("attention_19_update_begin_0")]; + tensor attention_19_update_end_0_values0_0 = const()[name = string("attention_19_update_end_0_values0_0"), val = tensor([20])]; + tensor attention_19_update_end_0_values1_0 = const()[name = string("attention_19_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_19_update_end_0_values3_0 = const()[name = string("attention_19_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_19_update_end_0_axis_0 = const()[name = string("attention_19_update_end_0_axis_0"), val = int32(0)]; + bool attention_19_update_end_0_interleave_0 = const()[name = string("attention_19_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_19_update_end_0 = concat(axis = attention_19_update_end_0_axis_0, interleave = attention_19_update_end_0_interleave_0, values = (attention_19_update_end_0_values0_0, attention_19_update_end_0_values1_0, end_pos_0, attention_19_update_end_0_values3_0))[name = string("attention_19_update_end_0")]; + tensor attention_19_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_updated_key_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_key_cache_0_squeeze_mask_0, update = attention_19_k_rope, x = coreml_update_state_36)[name = string("attention_19_updated_key_cache_0")]; + write_state(data = attention_19_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_38_write_state")]; + tensor coreml_update_state_38 = read_state(input = key_cache_state)[name = string("coreml_update_state_38")]; + tensor attention_19_key_cache_begin_0 = const()[name = string("attention_19_key_cache_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor attention_19_key_cache_end_0 = const()[name = string("attention_19_key_cache_end_0"), val = tensor([20, 2, 512, 64])]; + tensor attention_19_key_cache_squeeze_mask_0 = const()[name = string("attention_19_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_key_cache = slice_by_index(begin = attention_19_key_cache_begin_0, end = attention_19_key_cache_end_0, squeeze_mask = attention_19_key_cache_squeeze_mask_0, x = coreml_update_state_38)[name = string("attention_19_key_cache")]; + int32 attention_19_key_cache_head_axis_0 = const()[name = string("attention_19_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_19_key_cache_head_num_splits_0 = const()[name = string("attention_19_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_19_key_cache_head_0, tensor attention_19_key_cache_head_1 = split(axis = attention_19_key_cache_head_axis_0, num_splits = attention_19_key_cache_head_num_splits_0, x = attention_19_key_cache)[name = string("attention_19_key_cache_head")]; + tensor attention_19_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_updated_value_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_value_cache_0_squeeze_mask_0, update = attention_19_split_qkv_heads_2, x = coreml_update_state_37)[name = string("attention_19_updated_value_cache_0")]; + write_state(data = attention_19_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_39_write_state")]; + tensor coreml_update_state_39 = read_state(input = value_cache_state)[name = string("coreml_update_state_39")]; + tensor attention_19_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_19_slice_current_layer_value_cache_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor attention_19_slice_current_layer_value_cache_end_0 = const()[name = string("attention_19_slice_current_layer_value_cache_end_0"), val = tensor([20, 2, 512, 64])]; + tensor attention_19_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_19_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_slice_current_layer_value_cache = slice_by_index(begin = attention_19_slice_current_layer_value_cache_begin_0, end = attention_19_slice_current_layer_value_cache_end_0, squeeze_mask = attention_19_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_39)[name = string("attention_19_slice_current_layer_value_cache")]; + int32 attention_19_slice_value_cache_heads_axis_0 = const()[name = string("attention_19_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_19_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_19_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_19_slice_value_cache_heads_0, tensor attention_19_slice_value_cache_heads_1 = split(axis = attention_19_slice_value_cache_heads_axis_0, num_splits = attention_19_slice_value_cache_heads_num_splits_0, x = attention_19_slice_current_layer_value_cache)[name = string("attention_19_slice_value_cache_heads")]; + bool attention_19_scores_0_transpose_y_0 = const()[name = string("attention_19_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_19_scores_0_transpose_x_0 = const()[name = string("attention_19_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_19_scores_0 = matmul(transpose_x = attention_19_scores_0_transpose_x_0, transpose_y = attention_19_scores_0_transpose_y_0, x = attention_19_key_cache_head_0, y = attention_19_q_splits_0)[name = string("attention_19_scores_0")]; + fp16 attention_19_scaled_scores_0_y_0 = const()[name = string("attention_19_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_19_scaled_scores_0 = mul(x = attention_19_scores_0, y = attention_19_scaled_scores_0_y_0)[name = string("attention_19_scaled_scores_0")]; + tensor attention_19_masked_scaled_scores_0 = add(x = attention_19_scaled_scores_0, y = transpose_0)[name = string("attention_19_masked_scaled_scores_0")]; + int32 softmax_38_axis_0 = const()[name = string("softmax_38_axis_0"), val = int32(-2)]; + tensor softmax_38 = softmax(axis = softmax_38_axis_0, x = attention_19_masked_scaled_scores_0)[name = string("softmax_38")]; + bool attention_19_attention_0_transpose_x_0 = const()[name = string("attention_19_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_19_attention_0_transpose_y_0 = const()[name = string("attention_19_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_19_attention_0 = matmul(transpose_x = attention_19_attention_0_transpose_x_0, transpose_y = attention_19_attention_0_transpose_y_0, x = softmax_38, y = attention_19_slice_value_cache_heads_0)[name = string("attention_19_attention_0")]; + bool attention_19_scores_1_transpose_y_0 = const()[name = string("attention_19_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_19_scores_1_transpose_x_0 = const()[name = string("attention_19_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_19_scores_1 = matmul(transpose_x = attention_19_scores_1_transpose_x_0, transpose_y = attention_19_scores_1_transpose_y_0, x = attention_19_key_cache_head_1, y = attention_19_q_splits_1)[name = string("attention_19_scores_1")]; + fp16 attention_19_scaled_scores_1_y_0 = const()[name = string("attention_19_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_19_scaled_scores_1 = mul(x = attention_19_scores_1, y = attention_19_scaled_scores_1_y_0)[name = string("attention_19_scaled_scores_1")]; + tensor attention_19_masked_scaled_scores_1 = add(x = attention_19_scaled_scores_1, y = transpose_0)[name = string("attention_19_masked_scaled_scores_1")]; + int32 softmax_39_axis_0 = const()[name = string("softmax_39_axis_0"), val = int32(-2)]; + tensor softmax_39 = softmax(axis = softmax_39_axis_0, x = attention_19_masked_scaled_scores_1)[name = string("softmax_39")]; + bool attention_19_attention_1_transpose_x_0 = const()[name = string("attention_19_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_19_attention_1_transpose_y_0 = const()[name = string("attention_19_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_19_attention_1 = matmul(transpose_x = attention_19_attention_1_transpose_x_0, transpose_y = attention_19_attention_1_transpose_y_0, x = softmax_39, y = attention_19_slice_value_cache_heads_1)[name = string("attention_19_attention_1")]; + int32 attention_19_concat_attention_all_heads_axis_0 = const()[name = string("attention_19_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_19_concat_attention_all_heads_interleave_0 = const()[name = string("attention_19_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_19_concat_attention_all_heads = concat(axis = attention_19_concat_attention_all_heads_axis_0, interleave = attention_19_concat_attention_all_heads_interleave_0, values = (attention_19_attention_0, attention_19_attention_1))[name = string("attention_19_concat_attention_all_heads")]; + tensor attention_19_channels_first_retransposed_perm_0 = const()[name = string("attention_19_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_19_reshaped_shape_0 = const()[name = string("attention_19_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_19_channels_first_retransposed = transpose(perm = attention_19_channels_first_retransposed_perm_0, x = attention_19_concat_attention_all_heads)[name = string("transpose_9")]; + tensor attention_19_reshaped = reshape(shape = attention_19_reshaped_shape_0, x = attention_19_channels_first_retransposed)[name = string("attention_19_reshaped")]; + tensor attention_19_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512727424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513329600))))[name = string("attention_19_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_91 = constexpr_blockwise_shift_scale(data = attention_19_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513358336))))[name = string("constexpr_blockwise_shift_scale_91")]; + tensor attention_19_outproj_strides_0 = const()[name = string("attention_19_outproj_strides_0"), val = tensor([1])]; + string attention_19_outproj_pad_type_0 = const()[name = string("attention_19_outproj_pad_type_0"), val = string("valid")]; + tensor attention_19_outproj_pad_0 = const()[name = string("attention_19_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_19_outproj_dilations_0 = const()[name = string("attention_19_outproj_dilations_0"), val = tensor([1])]; + int32 attention_19_outproj_groups_0 = const()[name = string("attention_19_outproj_groups_0"), val = int32(1)]; + tensor attention_19_outproj = conv(dilations = attention_19_outproj_dilations_0, groups = attention_19_outproj_groups_0, pad = attention_19_outproj_pad_0, pad_type = attention_19_outproj_pad_type_0, strides = attention_19_outproj_strides_0, weight = constexpr_blockwise_shift_scale_91, x = attention_19_reshaped)[name = string("attention_19_outproj")]; + tensor block_19_residual_1 = add(x = block_18_residual_2, y = attention_19_outproj)[name = string("block_19_residual_1")]; + tensor block_19_ffn_rmsnorm_abs = abs(x = block_19_residual_1)[name = string("block_19_ffn_rmsnorm_abs")]; + tensor block_19_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_19_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_19_ffn_rmsnorm_maxval = reduce_max(axes = block_19_ffn_rmsnorm_maxval_axes_0, keep_dims = block_19_ffn_rmsnorm_maxval_keep_dims_0, x = block_19_ffn_rmsnorm_abs)[name = string("block_19_ffn_rmsnorm_maxval")]; + fp16 block_19_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_19_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_19_ffn_rmsnorm_maxval_clipped = clip(alpha = block_19_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_19_ffn_rmsnorm_maxval_clipped_beta_0, x = block_19_ffn_rmsnorm_maxval)[name = string("block_19_ffn_rmsnorm_maxval_clipped")]; + tensor block_19_ffn_rmsnorm_scaled = real_div(x = block_19_residual_1, y = block_19_ffn_rmsnorm_maxval_clipped)[name = string("block_19_ffn_rmsnorm_scaled")]; + tensor block_19_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_19_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_19_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_19_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_19_ffn_rmsnorm_scaled)[name = string("block_19_ffn_rmsnorm_squared_sum")]; + fp16 block_19_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_19_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_19_ffn_rmsnorm_rsqrt_epsilon_0, x = block_19_ffn_rmsnorm_squared_sum)[name = string("block_19_ffn_rmsnorm_rsqrt")]; + fp16 block_19_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_19_ffn_rmsnorm_dim_scaled = mul(x = block_19_ffn_rmsnorm_scaled, y = block_19_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_19_ffn_rmsnorm_dim_scaled")]; + tensor block_19_ffn_rmsnorm_normalized = mul(x = block_19_ffn_rmsnorm_dim_scaled, y = block_19_ffn_rmsnorm_rsqrt)[name = string("block_19_ffn_rmsnorm_normalized")]; + tensor block_19_ffn_rmsnorm_y_0 = const()[name = string("block_19_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513360192)))]; + tensor block_19_ffn_rmsnorm = mul(x = block_19_ffn_rmsnorm_normalized, y = block_19_ffn_rmsnorm_y_0)[name = string("block_19_ffn_rmsnorm")]; + tensor block_19_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516630720))))[name = string("block_19_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_92 = constexpr_blockwise_shift_scale(data = block_19_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516786432))))[name = string("constexpr_blockwise_shift_scale_92")]; + tensor block_19_ffn_inproj_strides_0 = const()[name = string("block_19_ffn_inproj_strides_0"), val = tensor([1])]; + string block_19_ffn_inproj_pad_type_0 = const()[name = string("block_19_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_19_ffn_inproj_pad_0 = const()[name = string("block_19_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_19_ffn_inproj_dilations_0 = const()[name = string("block_19_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_19_ffn_inproj_groups_0 = const()[name = string("block_19_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_19_ffn_inproj = conv(dilations = block_19_ffn_inproj_dilations_0, groups = block_19_ffn_inproj_groups_0, pad = block_19_ffn_inproj_pad_0, pad_type = block_19_ffn_inproj_pad_type_0, strides = block_19_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_92, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_inproj")]; + tensor block_19_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516796224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520064896))))[name = string("block_19_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_93 = constexpr_blockwise_shift_scale(data = block_19_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520220608))))[name = string("constexpr_blockwise_shift_scale_93")]; + tensor block_19_ffn_g_strides_0 = const()[name = string("block_19_ffn_g_strides_0"), val = tensor([1])]; + string block_19_ffn_g_pad_type_0 = const()[name = string("block_19_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_19_ffn_g_pad_0 = const()[name = string("block_19_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_19_ffn_g_dilations_0 = const()[name = string("block_19_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_19_ffn_g_groups_0 = const()[name = string("block_19_ffn_g_groups_0"), val = int32(1)]; + tensor block_19_ffn_g = conv(dilations = block_19_ffn_g_dilations_0, groups = block_19_ffn_g_groups_0, pad = block_19_ffn_g_pad_0, pad_type = block_19_ffn_g_pad_type_0, strides = block_19_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_93, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_g")]; + tensor block_19_ffn_g_activation = silu(x = block_19_ffn_g)[name = string("block_19_ffn_g_activation")]; + tensor block_19_ffn_x_gated = mul(x = block_19_ffn_inproj, y = block_19_ffn_g_activation)[name = string("block_19_ffn_x_gated")]; + tensor block_19_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520230400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523499072))))[name = string("block_19_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_94 = constexpr_blockwise_shift_scale(data = block_19_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523527808))))[name = string("constexpr_blockwise_shift_scale_94")]; + tensor block_19_ffn_outproj_strides_0 = const()[name = string("block_19_ffn_outproj_strides_0"), val = tensor([1])]; + string block_19_ffn_outproj_pad_type_0 = const()[name = string("block_19_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_19_ffn_outproj_pad_0 = const()[name = string("block_19_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_19_ffn_outproj_dilations_0 = const()[name = string("block_19_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_19_ffn_outproj_groups_0 = const()[name = string("block_19_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_19_ffn_outproj = conv(dilations = block_19_ffn_outproj_dilations_0, groups = block_19_ffn_outproj_groups_0, pad = block_19_ffn_outproj_pad_0, pad_type = block_19_ffn_outproj_pad_type_0, strides = block_19_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_94, x = block_19_ffn_x_gated)[name = string("block_19_ffn_outproj")]; + tensor block_19_residual_2 = add(x = block_19_ffn_outproj, y = block_19_residual_1)[name = string("block_19_residual_2")]; + tensor block_20_attention_rmsnorm_abs = abs(x = block_19_residual_2)[name = string("block_20_attention_rmsnorm_abs")]; + tensor block_20_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_20_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_20_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_20_attention_rmsnorm_maxval = reduce_max(axes = block_20_attention_rmsnorm_maxval_axes_0, keep_dims = block_20_attention_rmsnorm_maxval_keep_dims_0, x = block_20_attention_rmsnorm_abs)[name = string("block_20_attention_rmsnorm_maxval")]; + fp16 block_20_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_20_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_20_attention_rmsnorm_maxval_clipped = clip(alpha = block_20_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_20_attention_rmsnorm_maxval_clipped_beta_0, x = block_20_attention_rmsnorm_maxval)[name = string("block_20_attention_rmsnorm_maxval_clipped")]; + tensor block_20_attention_rmsnorm_scaled = real_div(x = block_19_residual_2, y = block_20_attention_rmsnorm_maxval_clipped)[name = string("block_20_attention_rmsnorm_scaled")]; + tensor block_20_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_20_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_20_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_20_attention_rmsnorm_squared_sum_keep_dims_0, x = block_20_attention_rmsnorm_scaled)[name = string("block_20_attention_rmsnorm_squared_sum")]; + fp16 block_20_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_20_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_20_attention_rmsnorm_rsqrt_epsilon_0, x = block_20_attention_rmsnorm_squared_sum)[name = string("block_20_attention_rmsnorm_rsqrt")]; + fp16 block_20_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_20_attention_rmsnorm_dim_scaled = mul(x = block_20_attention_rmsnorm_scaled, y = block_20_attention_rmsnorm_dim_scaled_y_0)[name = string("block_20_attention_rmsnorm_dim_scaled")]; + tensor block_20_attention_rmsnorm_normalized = mul(x = block_20_attention_rmsnorm_dim_scaled, y = block_20_attention_rmsnorm_rsqrt)[name = string("block_20_attention_rmsnorm_normalized")]; + tensor block_20_attention_rmsnorm_y_0 = const()[name = string("block_20_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523529664)))]; + tensor block_20_attention_rmsnorm = mul(x = block_20_attention_rmsnorm_normalized, y = block_20_attention_rmsnorm_y_0)[name = string("block_20_attention_rmsnorm")]; + tensor attention_20_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523531520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524305728))))[name = string("attention_20_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_95 = constexpr_blockwise_shift_scale(data = attention_20_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524342656))))[name = string("constexpr_blockwise_shift_scale_95")]; + tensor attention_20_qkvproj_bias_0 = const()[name = string("attention_20_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524345024)))]; + tensor attention_20_qkvproj_strides_0 = const()[name = string("attention_20_qkvproj_strides_0"), val = tensor([1])]; + string attention_20_qkvproj_pad_type_0 = const()[name = string("attention_20_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_20_qkvproj_pad_0 = const()[name = string("attention_20_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_20_qkvproj_dilations_0 = const()[name = string("attention_20_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_20_qkvproj_groups_0 = const()[name = string("attention_20_qkvproj_groups_0"), val = int32(1)]; + tensor attention_20_qkvproj = conv(bias = attention_20_qkvproj_bias_0, dilations = attention_20_qkvproj_dilations_0, groups = attention_20_qkvproj_groups_0, pad = attention_20_qkvproj_pad_0, pad_type = attention_20_qkvproj_pad_type_0, strides = attention_20_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_95, x = block_20_attention_rmsnorm)[name = string("attention_20_qkvproj")]; + tensor attention_20_head_reshape_shape_0 = const()[name = string("attention_20_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_20_head_reshape = reshape(shape = attention_20_head_reshape_shape_0, x = attention_20_qkvproj)[name = string("attention_20_head_reshape")]; + tensor attention_20_head_transpose_perm_0 = const()[name = string("attention_20_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_20_split_qkv_heads_axis_0 = const()[name = string("attention_20_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_20_split_qkv_heads_split_sizes_0 = const()[name = string("attention_20_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_20_head_transpose = transpose(perm = attention_20_head_transpose_perm_0, x = attention_20_head_reshape)[name = string("transpose_8")]; + tensor attention_20_split_qkv_heads_0, tensor attention_20_split_qkv_heads_1, tensor attention_20_split_qkv_heads_2 = split(axis = attention_20_split_qkv_heads_axis_0, split_sizes = attention_20_split_qkv_heads_split_sizes_0, x = attention_20_head_transpose)[name = string("attention_20_split_qkv_heads")]; + tensor attention_20_q_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_20_q_rope_lhs_mult")]; + int32 attention_20_q_rotate_half_split_num_splits_0 = const()[name = string("attention_20_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_20_q_rotate_half_split_axis_0 = const()[name = string("attention_20_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_20_q_rotate_half_split_0, tensor attention_20_q_rotate_half_split_1 = split(axis = attention_20_q_rotate_half_split_axis_0, num_splits = attention_20_q_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_0)[name = string("attention_20_q_rotate_half_split")]; + fp16 attention_20_q_rotate_half_neg_y_0 = const()[name = string("attention_20_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_20_q_rotate_half_neg = mul(x = attention_20_q_rotate_half_split_1, y = attention_20_q_rotate_half_neg_y_0)[name = string("attention_20_q_rotate_half_neg")]; + int32 attention_20_q_rotate_half_concat_axis_0 = const()[name = string("attention_20_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_20_q_rotate_half_concat_interleave_0 = const()[name = string("attention_20_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_20_q_rotate_half_concat = concat(axis = attention_20_q_rotate_half_concat_axis_0, interleave = attention_20_q_rotate_half_concat_interleave_0, values = (attention_20_q_rotate_half_neg, attention_20_q_rotate_half_split_0))[name = string("attention_20_q_rotate_half_concat")]; + tensor attention_20_q_rope_rhs_mult = mul(x = attention_20_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_q_rope_rhs_mult")]; + tensor attention_20_q_rope = add(x = attention_20_q_rope_lhs_mult, y = attention_20_q_rope_rhs_mult)[name = string("attention_20_q_rope")]; + tensor attention_20_k_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_20_k_rope_lhs_mult")]; + int32 attention_20_k_rotate_half_split_num_splits_0 = const()[name = string("attention_20_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_20_k_rotate_half_split_axis_0 = const()[name = string("attention_20_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_20_k_rotate_half_split_0, tensor attention_20_k_rotate_half_split_1 = split(axis = attention_20_k_rotate_half_split_axis_0, num_splits = attention_20_k_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_1)[name = string("attention_20_k_rotate_half_split")]; + fp16 attention_20_k_rotate_half_neg_y_0 = const()[name = string("attention_20_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_20_k_rotate_half_neg = mul(x = attention_20_k_rotate_half_split_1, y = attention_20_k_rotate_half_neg_y_0)[name = string("attention_20_k_rotate_half_neg")]; + int32 attention_20_k_rotate_half_concat_axis_0 = const()[name = string("attention_20_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_20_k_rotate_half_concat_interleave_0 = const()[name = string("attention_20_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_20_k_rotate_half_concat = concat(axis = attention_20_k_rotate_half_concat_axis_0, interleave = attention_20_k_rotate_half_concat_interleave_0, values = (attention_20_k_rotate_half_neg, attention_20_k_rotate_half_split_0))[name = string("attention_20_k_rotate_half_concat")]; + tensor attention_20_k_rope_rhs_mult = mul(x = attention_20_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_k_rope_rhs_mult")]; + tensor attention_20_k_rope = add(x = attention_20_k_rope_lhs_mult, y = attention_20_k_rope_rhs_mult)[name = string("attention_20_k_rope")]; + int32 attention_20_q_splits_axis_0 = const()[name = string("attention_20_q_splits_axis_0"), val = int32(1)]; + int32 attention_20_q_splits_num_splits_0 = const()[name = string("attention_20_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_20_q_splits_0, tensor attention_20_q_splits_1 = split(axis = attention_20_q_splits_axis_0, num_splits = attention_20_q_splits_num_splits_0, x = attention_20_q_rope)[name = string("attention_20_q_splits")]; + tensor attention_20_update_begin_0_values0_0 = const()[name = string("attention_20_update_begin_0_values0_0"), val = tensor([20])]; + tensor attention_20_update_begin_0_values1_0 = const()[name = string("attention_20_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_20_update_begin_0_values3_0 = const()[name = string("attention_20_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_20_update_begin_0_axis_0 = const()[name = string("attention_20_update_begin_0_axis_0"), val = int32(0)]; + bool attention_20_update_begin_0_interleave_0 = const()[name = string("attention_20_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_20_update_begin_0 = concat(axis = attention_20_update_begin_0_axis_0, interleave = attention_20_update_begin_0_interleave_0, values = (attention_20_update_begin_0_values0_0, attention_20_update_begin_0_values1_0, query_pos1, attention_20_update_begin_0_values3_0))[name = string("attention_20_update_begin_0")]; + tensor attention_20_update_end_0_values0_0 = const()[name = string("attention_20_update_end_0_values0_0"), val = tensor([21])]; + tensor attention_20_update_end_0_values1_0 = const()[name = string("attention_20_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_20_update_end_0_values3_0 = const()[name = string("attention_20_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_20_update_end_0_axis_0 = const()[name = string("attention_20_update_end_0_axis_0"), val = int32(0)]; + bool attention_20_update_end_0_interleave_0 = const()[name = string("attention_20_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_20_update_end_0 = concat(axis = attention_20_update_end_0_axis_0, interleave = attention_20_update_end_0_interleave_0, values = (attention_20_update_end_0_values0_0, attention_20_update_end_0_values1_0, end_pos_0, attention_20_update_end_0_values3_0))[name = string("attention_20_update_end_0")]; + tensor attention_20_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_updated_key_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_key_cache_0_squeeze_mask_0, update = attention_20_k_rope, x = coreml_update_state_38)[name = string("attention_20_updated_key_cache_0")]; + write_state(data = attention_20_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_40_write_state")]; + tensor coreml_update_state_40 = read_state(input = key_cache_state)[name = string("coreml_update_state_40")]; + tensor attention_20_key_cache_begin_0 = const()[name = string("attention_20_key_cache_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor attention_20_key_cache_end_0 = const()[name = string("attention_20_key_cache_end_0"), val = tensor([21, 2, 512, 64])]; + tensor attention_20_key_cache_squeeze_mask_0 = const()[name = string("attention_20_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_key_cache = slice_by_index(begin = attention_20_key_cache_begin_0, end = attention_20_key_cache_end_0, squeeze_mask = attention_20_key_cache_squeeze_mask_0, x = coreml_update_state_40)[name = string("attention_20_key_cache")]; + int32 attention_20_key_cache_head_axis_0 = const()[name = string("attention_20_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_20_key_cache_head_num_splits_0 = const()[name = string("attention_20_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_20_key_cache_head_0, tensor attention_20_key_cache_head_1 = split(axis = attention_20_key_cache_head_axis_0, num_splits = attention_20_key_cache_head_num_splits_0, x = attention_20_key_cache)[name = string("attention_20_key_cache_head")]; + tensor attention_20_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_updated_value_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_value_cache_0_squeeze_mask_0, update = attention_20_split_qkv_heads_2, x = coreml_update_state_39)[name = string("attention_20_updated_value_cache_0")]; + write_state(data = attention_20_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_41_write_state")]; + tensor coreml_update_state_41 = read_state(input = value_cache_state)[name = string("coreml_update_state_41")]; + tensor attention_20_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_20_slice_current_layer_value_cache_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor attention_20_slice_current_layer_value_cache_end_0 = const()[name = string("attention_20_slice_current_layer_value_cache_end_0"), val = tensor([21, 2, 512, 64])]; + tensor attention_20_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_20_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_slice_current_layer_value_cache = slice_by_index(begin = attention_20_slice_current_layer_value_cache_begin_0, end = attention_20_slice_current_layer_value_cache_end_0, squeeze_mask = attention_20_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_41)[name = string("attention_20_slice_current_layer_value_cache")]; + int32 attention_20_slice_value_cache_heads_axis_0 = const()[name = string("attention_20_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_20_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_20_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_20_slice_value_cache_heads_0, tensor attention_20_slice_value_cache_heads_1 = split(axis = attention_20_slice_value_cache_heads_axis_0, num_splits = attention_20_slice_value_cache_heads_num_splits_0, x = attention_20_slice_current_layer_value_cache)[name = string("attention_20_slice_value_cache_heads")]; + bool attention_20_scores_0_transpose_y_0 = const()[name = string("attention_20_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_20_scores_0_transpose_x_0 = const()[name = string("attention_20_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_20_scores_0 = matmul(transpose_x = attention_20_scores_0_transpose_x_0, transpose_y = attention_20_scores_0_transpose_y_0, x = attention_20_key_cache_head_0, y = attention_20_q_splits_0)[name = string("attention_20_scores_0")]; + fp16 attention_20_scaled_scores_0_y_0 = const()[name = string("attention_20_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_20_scaled_scores_0 = mul(x = attention_20_scores_0, y = attention_20_scaled_scores_0_y_0)[name = string("attention_20_scaled_scores_0")]; + tensor attention_20_masked_scaled_scores_0 = add(x = attention_20_scaled_scores_0, y = transpose_0)[name = string("attention_20_masked_scaled_scores_0")]; + int32 softmax_40_axis_0 = const()[name = string("softmax_40_axis_0"), val = int32(-2)]; + tensor softmax_40 = softmax(axis = softmax_40_axis_0, x = attention_20_masked_scaled_scores_0)[name = string("softmax_40")]; + bool attention_20_attention_0_transpose_x_0 = const()[name = string("attention_20_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_20_attention_0_transpose_y_0 = const()[name = string("attention_20_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_20_attention_0 = matmul(transpose_x = attention_20_attention_0_transpose_x_0, transpose_y = attention_20_attention_0_transpose_y_0, x = softmax_40, y = attention_20_slice_value_cache_heads_0)[name = string("attention_20_attention_0")]; + bool attention_20_scores_1_transpose_y_0 = const()[name = string("attention_20_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_20_scores_1_transpose_x_0 = const()[name = string("attention_20_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_20_scores_1 = matmul(transpose_x = attention_20_scores_1_transpose_x_0, transpose_y = attention_20_scores_1_transpose_y_0, x = attention_20_key_cache_head_1, y = attention_20_q_splits_1)[name = string("attention_20_scores_1")]; + fp16 attention_20_scaled_scores_1_y_0 = const()[name = string("attention_20_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_20_scaled_scores_1 = mul(x = attention_20_scores_1, y = attention_20_scaled_scores_1_y_0)[name = string("attention_20_scaled_scores_1")]; + tensor attention_20_masked_scaled_scores_1 = add(x = attention_20_scaled_scores_1, y = transpose_0)[name = string("attention_20_masked_scaled_scores_1")]; + int32 softmax_41_axis_0 = const()[name = string("softmax_41_axis_0"), val = int32(-2)]; + tensor softmax_41 = softmax(axis = softmax_41_axis_0, x = attention_20_masked_scaled_scores_1)[name = string("softmax_41")]; + bool attention_20_attention_1_transpose_x_0 = const()[name = string("attention_20_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_20_attention_1_transpose_y_0 = const()[name = string("attention_20_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_20_attention_1 = matmul(transpose_x = attention_20_attention_1_transpose_x_0, transpose_y = attention_20_attention_1_transpose_y_0, x = softmax_41, y = attention_20_slice_value_cache_heads_1)[name = string("attention_20_attention_1")]; + int32 attention_20_concat_attention_all_heads_axis_0 = const()[name = string("attention_20_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_20_concat_attention_all_heads_interleave_0 = const()[name = string("attention_20_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_20_concat_attention_all_heads = concat(axis = attention_20_concat_attention_all_heads_axis_0, interleave = attention_20_concat_attention_all_heads_interleave_0, values = (attention_20_attention_0, attention_20_attention_1))[name = string("attention_20_concat_attention_all_heads")]; + tensor attention_20_channels_first_retransposed_perm_0 = const()[name = string("attention_20_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_20_reshaped_shape_0 = const()[name = string("attention_20_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_20_channels_first_retransposed = transpose(perm = attention_20_channels_first_retransposed_perm_0, x = attention_20_concat_attention_all_heads)[name = string("transpose_7")]; + tensor attention_20_reshaped = reshape(shape = attention_20_reshaped_shape_0, x = attention_20_channels_first_retransposed)[name = string("attention_20_reshaped")]; + tensor attention_20_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524347392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524949568))))[name = string("attention_20_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_96 = constexpr_blockwise_shift_scale(data = attention_20_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524978304))))[name = string("constexpr_blockwise_shift_scale_96")]; + tensor attention_20_outproj_strides_0 = const()[name = string("attention_20_outproj_strides_0"), val = tensor([1])]; + string attention_20_outproj_pad_type_0 = const()[name = string("attention_20_outproj_pad_type_0"), val = string("valid")]; + tensor attention_20_outproj_pad_0 = const()[name = string("attention_20_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_20_outproj_dilations_0 = const()[name = string("attention_20_outproj_dilations_0"), val = tensor([1])]; + int32 attention_20_outproj_groups_0 = const()[name = string("attention_20_outproj_groups_0"), val = int32(1)]; + tensor attention_20_outproj = conv(dilations = attention_20_outproj_dilations_0, groups = attention_20_outproj_groups_0, pad = attention_20_outproj_pad_0, pad_type = attention_20_outproj_pad_type_0, strides = attention_20_outproj_strides_0, weight = constexpr_blockwise_shift_scale_96, x = attention_20_reshaped)[name = string("attention_20_outproj")]; + tensor block_20_residual_1 = add(x = block_19_residual_2, y = attention_20_outproj)[name = string("block_20_residual_1")]; + tensor block_20_ffn_rmsnorm_abs = abs(x = block_20_residual_1)[name = string("block_20_ffn_rmsnorm_abs")]; + tensor block_20_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_20_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_20_ffn_rmsnorm_maxval = reduce_max(axes = block_20_ffn_rmsnorm_maxval_axes_0, keep_dims = block_20_ffn_rmsnorm_maxval_keep_dims_0, x = block_20_ffn_rmsnorm_abs)[name = string("block_20_ffn_rmsnorm_maxval")]; + fp16 block_20_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_20_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_20_ffn_rmsnorm_maxval_clipped = clip(alpha = block_20_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_20_ffn_rmsnorm_maxval_clipped_beta_0, x = block_20_ffn_rmsnorm_maxval)[name = string("block_20_ffn_rmsnorm_maxval_clipped")]; + tensor block_20_ffn_rmsnorm_scaled = real_div(x = block_20_residual_1, y = block_20_ffn_rmsnorm_maxval_clipped)[name = string("block_20_ffn_rmsnorm_scaled")]; + tensor block_20_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_20_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_20_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_20_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_20_ffn_rmsnorm_scaled)[name = string("block_20_ffn_rmsnorm_squared_sum")]; + fp16 block_20_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_20_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_20_ffn_rmsnorm_rsqrt_epsilon_0, x = block_20_ffn_rmsnorm_squared_sum)[name = string("block_20_ffn_rmsnorm_rsqrt")]; + fp16 block_20_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_20_ffn_rmsnorm_dim_scaled = mul(x = block_20_ffn_rmsnorm_scaled, y = block_20_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_20_ffn_rmsnorm_dim_scaled")]; + tensor block_20_ffn_rmsnorm_normalized = mul(x = block_20_ffn_rmsnorm_dim_scaled, y = block_20_ffn_rmsnorm_rsqrt)[name = string("block_20_ffn_rmsnorm_normalized")]; + tensor block_20_ffn_rmsnorm_y_0 = const()[name = string("block_20_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524980160)))]; + tensor block_20_ffn_rmsnorm = mul(x = block_20_ffn_rmsnorm_normalized, y = block_20_ffn_rmsnorm_y_0)[name = string("block_20_ffn_rmsnorm")]; + tensor block_20_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524982016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528250688))))[name = string("block_20_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_97 = constexpr_blockwise_shift_scale(data = block_20_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528406400))))[name = string("constexpr_blockwise_shift_scale_97")]; + tensor block_20_ffn_inproj_strides_0 = const()[name = string("block_20_ffn_inproj_strides_0"), val = tensor([1])]; + string block_20_ffn_inproj_pad_type_0 = const()[name = string("block_20_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_20_ffn_inproj_pad_0 = const()[name = string("block_20_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_20_ffn_inproj_dilations_0 = const()[name = string("block_20_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_20_ffn_inproj_groups_0 = const()[name = string("block_20_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_20_ffn_inproj = conv(dilations = block_20_ffn_inproj_dilations_0, groups = block_20_ffn_inproj_groups_0, pad = block_20_ffn_inproj_pad_0, pad_type = block_20_ffn_inproj_pad_type_0, strides = block_20_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_97, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_inproj")]; + tensor block_20_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528416192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531684864))))[name = string("block_20_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_98 = constexpr_blockwise_shift_scale(data = block_20_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531840576))))[name = string("constexpr_blockwise_shift_scale_98")]; + tensor block_20_ffn_g_strides_0 = const()[name = string("block_20_ffn_g_strides_0"), val = tensor([1])]; + string block_20_ffn_g_pad_type_0 = const()[name = string("block_20_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_20_ffn_g_pad_0 = const()[name = string("block_20_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_20_ffn_g_dilations_0 = const()[name = string("block_20_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_20_ffn_g_groups_0 = const()[name = string("block_20_ffn_g_groups_0"), val = int32(1)]; + tensor block_20_ffn_g = conv(dilations = block_20_ffn_g_dilations_0, groups = block_20_ffn_g_groups_0, pad = block_20_ffn_g_pad_0, pad_type = block_20_ffn_g_pad_type_0, strides = block_20_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_98, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_g")]; + tensor block_20_ffn_g_activation = silu(x = block_20_ffn_g)[name = string("block_20_ffn_g_activation")]; + tensor block_20_ffn_x_gated = mul(x = block_20_ffn_inproj, y = block_20_ffn_g_activation)[name = string("block_20_ffn_x_gated")]; + tensor block_20_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531850368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535119040))))[name = string("block_20_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_99 = constexpr_blockwise_shift_scale(data = block_20_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535147776))))[name = string("constexpr_blockwise_shift_scale_99")]; + tensor block_20_ffn_outproj_strides_0 = const()[name = string("block_20_ffn_outproj_strides_0"), val = tensor([1])]; + string block_20_ffn_outproj_pad_type_0 = const()[name = string("block_20_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_20_ffn_outproj_pad_0 = const()[name = string("block_20_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_20_ffn_outproj_dilations_0 = const()[name = string("block_20_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_20_ffn_outproj_groups_0 = const()[name = string("block_20_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_20_ffn_outproj = conv(dilations = block_20_ffn_outproj_dilations_0, groups = block_20_ffn_outproj_groups_0, pad = block_20_ffn_outproj_pad_0, pad_type = block_20_ffn_outproj_pad_type_0, strides = block_20_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_99, x = block_20_ffn_x_gated)[name = string("block_20_ffn_outproj")]; + tensor block_20_residual_2 = add(x = block_20_ffn_outproj, y = block_20_residual_1)[name = string("block_20_residual_2")]; + tensor block_21_attention_rmsnorm_abs = abs(x = block_20_residual_2)[name = string("block_21_attention_rmsnorm_abs")]; + tensor block_21_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_21_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_21_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_21_attention_rmsnorm_maxval = reduce_max(axes = block_21_attention_rmsnorm_maxval_axes_0, keep_dims = block_21_attention_rmsnorm_maxval_keep_dims_0, x = block_21_attention_rmsnorm_abs)[name = string("block_21_attention_rmsnorm_maxval")]; + fp16 block_21_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_21_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_21_attention_rmsnorm_maxval_clipped = clip(alpha = block_21_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_21_attention_rmsnorm_maxval_clipped_beta_0, x = block_21_attention_rmsnorm_maxval)[name = string("block_21_attention_rmsnorm_maxval_clipped")]; + tensor block_21_attention_rmsnorm_scaled = real_div(x = block_20_residual_2, y = block_21_attention_rmsnorm_maxval_clipped)[name = string("block_21_attention_rmsnorm_scaled")]; + tensor block_21_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_21_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_21_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_21_attention_rmsnorm_squared_sum_keep_dims_0, x = block_21_attention_rmsnorm_scaled)[name = string("block_21_attention_rmsnorm_squared_sum")]; + fp16 block_21_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_21_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_21_attention_rmsnorm_rsqrt_epsilon_0, x = block_21_attention_rmsnorm_squared_sum)[name = string("block_21_attention_rmsnorm_rsqrt")]; + fp16 block_21_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_21_attention_rmsnorm_dim_scaled = mul(x = block_21_attention_rmsnorm_scaled, y = block_21_attention_rmsnorm_dim_scaled_y_0)[name = string("block_21_attention_rmsnorm_dim_scaled")]; + tensor block_21_attention_rmsnorm_normalized = mul(x = block_21_attention_rmsnorm_dim_scaled, y = block_21_attention_rmsnorm_rsqrt)[name = string("block_21_attention_rmsnorm_normalized")]; + tensor block_21_attention_rmsnorm_y_0 = const()[name = string("block_21_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535149632)))]; + tensor block_21_attention_rmsnorm = mul(x = block_21_attention_rmsnorm_normalized, y = block_21_attention_rmsnorm_y_0)[name = string("block_21_attention_rmsnorm")]; + tensor attention_21_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535151488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535925696))))[name = string("attention_21_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_100 = constexpr_blockwise_shift_scale(data = attention_21_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535962624))))[name = string("constexpr_blockwise_shift_scale_100")]; + tensor attention_21_qkvproj_bias_0 = const()[name = string("attention_21_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535964992)))]; + tensor attention_21_qkvproj_strides_0 = const()[name = string("attention_21_qkvproj_strides_0"), val = tensor([1])]; + string attention_21_qkvproj_pad_type_0 = const()[name = string("attention_21_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_21_qkvproj_pad_0 = const()[name = string("attention_21_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_21_qkvproj_dilations_0 = const()[name = string("attention_21_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_21_qkvproj_groups_0 = const()[name = string("attention_21_qkvproj_groups_0"), val = int32(1)]; + tensor attention_21_qkvproj = conv(bias = attention_21_qkvproj_bias_0, dilations = attention_21_qkvproj_dilations_0, groups = attention_21_qkvproj_groups_0, pad = attention_21_qkvproj_pad_0, pad_type = attention_21_qkvproj_pad_type_0, strides = attention_21_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_100, x = block_21_attention_rmsnorm)[name = string("attention_21_qkvproj")]; + tensor attention_21_head_reshape_shape_0 = const()[name = string("attention_21_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_21_head_reshape = reshape(shape = attention_21_head_reshape_shape_0, x = attention_21_qkvproj)[name = string("attention_21_head_reshape")]; + tensor attention_21_head_transpose_perm_0 = const()[name = string("attention_21_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_21_split_qkv_heads_axis_0 = const()[name = string("attention_21_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_21_split_qkv_heads_split_sizes_0 = const()[name = string("attention_21_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_21_head_transpose = transpose(perm = attention_21_head_transpose_perm_0, x = attention_21_head_reshape)[name = string("transpose_6")]; + tensor attention_21_split_qkv_heads_0, tensor attention_21_split_qkv_heads_1, tensor attention_21_split_qkv_heads_2 = split(axis = attention_21_split_qkv_heads_axis_0, split_sizes = attention_21_split_qkv_heads_split_sizes_0, x = attention_21_head_transpose)[name = string("attention_21_split_qkv_heads")]; + tensor attention_21_q_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_21_q_rope_lhs_mult")]; + int32 attention_21_q_rotate_half_split_num_splits_0 = const()[name = string("attention_21_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_21_q_rotate_half_split_axis_0 = const()[name = string("attention_21_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_21_q_rotate_half_split_0, tensor attention_21_q_rotate_half_split_1 = split(axis = attention_21_q_rotate_half_split_axis_0, num_splits = attention_21_q_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_0)[name = string("attention_21_q_rotate_half_split")]; + fp16 attention_21_q_rotate_half_neg_y_0 = const()[name = string("attention_21_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_21_q_rotate_half_neg = mul(x = attention_21_q_rotate_half_split_1, y = attention_21_q_rotate_half_neg_y_0)[name = string("attention_21_q_rotate_half_neg")]; + int32 attention_21_q_rotate_half_concat_axis_0 = const()[name = string("attention_21_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_21_q_rotate_half_concat_interleave_0 = const()[name = string("attention_21_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_21_q_rotate_half_concat = concat(axis = attention_21_q_rotate_half_concat_axis_0, interleave = attention_21_q_rotate_half_concat_interleave_0, values = (attention_21_q_rotate_half_neg, attention_21_q_rotate_half_split_0))[name = string("attention_21_q_rotate_half_concat")]; + tensor attention_21_q_rope_rhs_mult = mul(x = attention_21_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_q_rope_rhs_mult")]; + tensor attention_21_q_rope = add(x = attention_21_q_rope_lhs_mult, y = attention_21_q_rope_rhs_mult)[name = string("attention_21_q_rope")]; + tensor attention_21_k_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_21_k_rope_lhs_mult")]; + int32 attention_21_k_rotate_half_split_num_splits_0 = const()[name = string("attention_21_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_21_k_rotate_half_split_axis_0 = const()[name = string("attention_21_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_21_k_rotate_half_split_0, tensor attention_21_k_rotate_half_split_1 = split(axis = attention_21_k_rotate_half_split_axis_0, num_splits = attention_21_k_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_1)[name = string("attention_21_k_rotate_half_split")]; + fp16 attention_21_k_rotate_half_neg_y_0 = const()[name = string("attention_21_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_21_k_rotate_half_neg = mul(x = attention_21_k_rotate_half_split_1, y = attention_21_k_rotate_half_neg_y_0)[name = string("attention_21_k_rotate_half_neg")]; + int32 attention_21_k_rotate_half_concat_axis_0 = const()[name = string("attention_21_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_21_k_rotate_half_concat_interleave_0 = const()[name = string("attention_21_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_21_k_rotate_half_concat = concat(axis = attention_21_k_rotate_half_concat_axis_0, interleave = attention_21_k_rotate_half_concat_interleave_0, values = (attention_21_k_rotate_half_neg, attention_21_k_rotate_half_split_0))[name = string("attention_21_k_rotate_half_concat")]; + tensor attention_21_k_rope_rhs_mult = mul(x = attention_21_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_k_rope_rhs_mult")]; + tensor attention_21_k_rope = add(x = attention_21_k_rope_lhs_mult, y = attention_21_k_rope_rhs_mult)[name = string("attention_21_k_rope")]; + int32 attention_21_q_splits_axis_0 = const()[name = string("attention_21_q_splits_axis_0"), val = int32(1)]; + int32 attention_21_q_splits_num_splits_0 = const()[name = string("attention_21_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_21_q_splits_0, tensor attention_21_q_splits_1 = split(axis = attention_21_q_splits_axis_0, num_splits = attention_21_q_splits_num_splits_0, x = attention_21_q_rope)[name = string("attention_21_q_splits")]; + tensor attention_21_update_begin_0_values0_0 = const()[name = string("attention_21_update_begin_0_values0_0"), val = tensor([21])]; + tensor attention_21_update_begin_0_values1_0 = const()[name = string("attention_21_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_21_update_begin_0_values3_0 = const()[name = string("attention_21_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_21_update_begin_0_axis_0 = const()[name = string("attention_21_update_begin_0_axis_0"), val = int32(0)]; + bool attention_21_update_begin_0_interleave_0 = const()[name = string("attention_21_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_21_update_begin_0 = concat(axis = attention_21_update_begin_0_axis_0, interleave = attention_21_update_begin_0_interleave_0, values = (attention_21_update_begin_0_values0_0, attention_21_update_begin_0_values1_0, query_pos1, attention_21_update_begin_0_values3_0))[name = string("attention_21_update_begin_0")]; + tensor attention_21_update_end_0_values0_0 = const()[name = string("attention_21_update_end_0_values0_0"), val = tensor([22])]; + tensor attention_21_update_end_0_values1_0 = const()[name = string("attention_21_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_21_update_end_0_values3_0 = const()[name = string("attention_21_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_21_update_end_0_axis_0 = const()[name = string("attention_21_update_end_0_axis_0"), val = int32(0)]; + bool attention_21_update_end_0_interleave_0 = const()[name = string("attention_21_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_21_update_end_0 = concat(axis = attention_21_update_end_0_axis_0, interleave = attention_21_update_end_0_interleave_0, values = (attention_21_update_end_0_values0_0, attention_21_update_end_0_values1_0, end_pos_0, attention_21_update_end_0_values3_0))[name = string("attention_21_update_end_0")]; + tensor attention_21_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_updated_key_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_key_cache_0_squeeze_mask_0, update = attention_21_k_rope, x = coreml_update_state_40)[name = string("attention_21_updated_key_cache_0")]; + write_state(data = attention_21_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_42_write_state")]; + tensor coreml_update_state_42 = read_state(input = key_cache_state)[name = string("coreml_update_state_42")]; + tensor attention_21_key_cache_begin_0 = const()[name = string("attention_21_key_cache_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor attention_21_key_cache_end_0 = const()[name = string("attention_21_key_cache_end_0"), val = tensor([22, 2, 512, 64])]; + tensor attention_21_key_cache_squeeze_mask_0 = const()[name = string("attention_21_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_key_cache = slice_by_index(begin = attention_21_key_cache_begin_0, end = attention_21_key_cache_end_0, squeeze_mask = attention_21_key_cache_squeeze_mask_0, x = coreml_update_state_42)[name = string("attention_21_key_cache")]; + int32 attention_21_key_cache_head_axis_0 = const()[name = string("attention_21_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_21_key_cache_head_num_splits_0 = const()[name = string("attention_21_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_21_key_cache_head_0, tensor attention_21_key_cache_head_1 = split(axis = attention_21_key_cache_head_axis_0, num_splits = attention_21_key_cache_head_num_splits_0, x = attention_21_key_cache)[name = string("attention_21_key_cache_head")]; + tensor attention_21_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_updated_value_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_value_cache_0_squeeze_mask_0, update = attention_21_split_qkv_heads_2, x = coreml_update_state_41)[name = string("attention_21_updated_value_cache_0")]; + write_state(data = attention_21_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_43_write_state")]; + tensor coreml_update_state_43 = read_state(input = value_cache_state)[name = string("coreml_update_state_43")]; + tensor attention_21_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_21_slice_current_layer_value_cache_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor attention_21_slice_current_layer_value_cache_end_0 = const()[name = string("attention_21_slice_current_layer_value_cache_end_0"), val = tensor([22, 2, 512, 64])]; + tensor attention_21_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_21_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_slice_current_layer_value_cache = slice_by_index(begin = attention_21_slice_current_layer_value_cache_begin_0, end = attention_21_slice_current_layer_value_cache_end_0, squeeze_mask = attention_21_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_43)[name = string("attention_21_slice_current_layer_value_cache")]; + int32 attention_21_slice_value_cache_heads_axis_0 = const()[name = string("attention_21_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_21_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_21_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_21_slice_value_cache_heads_0, tensor attention_21_slice_value_cache_heads_1 = split(axis = attention_21_slice_value_cache_heads_axis_0, num_splits = attention_21_slice_value_cache_heads_num_splits_0, x = attention_21_slice_current_layer_value_cache)[name = string("attention_21_slice_value_cache_heads")]; + bool attention_21_scores_0_transpose_y_0 = const()[name = string("attention_21_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_21_scores_0_transpose_x_0 = const()[name = string("attention_21_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_21_scores_0 = matmul(transpose_x = attention_21_scores_0_transpose_x_0, transpose_y = attention_21_scores_0_transpose_y_0, x = attention_21_key_cache_head_0, y = attention_21_q_splits_0)[name = string("attention_21_scores_0")]; + fp16 attention_21_scaled_scores_0_y_0 = const()[name = string("attention_21_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_21_scaled_scores_0 = mul(x = attention_21_scores_0, y = attention_21_scaled_scores_0_y_0)[name = string("attention_21_scaled_scores_0")]; + tensor attention_21_masked_scaled_scores_0 = add(x = attention_21_scaled_scores_0, y = transpose_0)[name = string("attention_21_masked_scaled_scores_0")]; + int32 softmax_42_axis_0 = const()[name = string("softmax_42_axis_0"), val = int32(-2)]; + tensor softmax_42 = softmax(axis = softmax_42_axis_0, x = attention_21_masked_scaled_scores_0)[name = string("softmax_42")]; + bool attention_21_attention_0_transpose_x_0 = const()[name = string("attention_21_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_21_attention_0_transpose_y_0 = const()[name = string("attention_21_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_21_attention_0 = matmul(transpose_x = attention_21_attention_0_transpose_x_0, transpose_y = attention_21_attention_0_transpose_y_0, x = softmax_42, y = attention_21_slice_value_cache_heads_0)[name = string("attention_21_attention_0")]; + bool attention_21_scores_1_transpose_y_0 = const()[name = string("attention_21_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_21_scores_1_transpose_x_0 = const()[name = string("attention_21_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_21_scores_1 = matmul(transpose_x = attention_21_scores_1_transpose_x_0, transpose_y = attention_21_scores_1_transpose_y_0, x = attention_21_key_cache_head_1, y = attention_21_q_splits_1)[name = string("attention_21_scores_1")]; + fp16 attention_21_scaled_scores_1_y_0 = const()[name = string("attention_21_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_21_scaled_scores_1 = mul(x = attention_21_scores_1, y = attention_21_scaled_scores_1_y_0)[name = string("attention_21_scaled_scores_1")]; + tensor attention_21_masked_scaled_scores_1 = add(x = attention_21_scaled_scores_1, y = transpose_0)[name = string("attention_21_masked_scaled_scores_1")]; + int32 softmax_43_axis_0 = const()[name = string("softmax_43_axis_0"), val = int32(-2)]; + tensor softmax_43 = softmax(axis = softmax_43_axis_0, x = attention_21_masked_scaled_scores_1)[name = string("softmax_43")]; + bool attention_21_attention_1_transpose_x_0 = const()[name = string("attention_21_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_21_attention_1_transpose_y_0 = const()[name = string("attention_21_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_21_attention_1 = matmul(transpose_x = attention_21_attention_1_transpose_x_0, transpose_y = attention_21_attention_1_transpose_y_0, x = softmax_43, y = attention_21_slice_value_cache_heads_1)[name = string("attention_21_attention_1")]; + int32 attention_21_concat_attention_all_heads_axis_0 = const()[name = string("attention_21_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_21_concat_attention_all_heads_interleave_0 = const()[name = string("attention_21_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_21_concat_attention_all_heads = concat(axis = attention_21_concat_attention_all_heads_axis_0, interleave = attention_21_concat_attention_all_heads_interleave_0, values = (attention_21_attention_0, attention_21_attention_1))[name = string("attention_21_concat_attention_all_heads")]; + tensor attention_21_channels_first_retransposed_perm_0 = const()[name = string("attention_21_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_21_reshaped_shape_0 = const()[name = string("attention_21_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_21_channels_first_retransposed = transpose(perm = attention_21_channels_first_retransposed_perm_0, x = attention_21_concat_attention_all_heads)[name = string("transpose_5")]; + tensor attention_21_reshaped = reshape(shape = attention_21_reshaped_shape_0, x = attention_21_channels_first_retransposed)[name = string("attention_21_reshaped")]; + tensor attention_21_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536569536))))[name = string("attention_21_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_101 = constexpr_blockwise_shift_scale(data = attention_21_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536598272))))[name = string("constexpr_blockwise_shift_scale_101")]; + tensor attention_21_outproj_strides_0 = const()[name = string("attention_21_outproj_strides_0"), val = tensor([1])]; + string attention_21_outproj_pad_type_0 = const()[name = string("attention_21_outproj_pad_type_0"), val = string("valid")]; + tensor attention_21_outproj_pad_0 = const()[name = string("attention_21_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_21_outproj_dilations_0 = const()[name = string("attention_21_outproj_dilations_0"), val = tensor([1])]; + int32 attention_21_outproj_groups_0 = const()[name = string("attention_21_outproj_groups_0"), val = int32(1)]; + tensor attention_21_outproj = conv(dilations = attention_21_outproj_dilations_0, groups = attention_21_outproj_groups_0, pad = attention_21_outproj_pad_0, pad_type = attention_21_outproj_pad_type_0, strides = attention_21_outproj_strides_0, weight = constexpr_blockwise_shift_scale_101, x = attention_21_reshaped)[name = string("attention_21_outproj")]; + tensor block_21_residual_1 = add(x = block_20_residual_2, y = attention_21_outproj)[name = string("block_21_residual_1")]; + tensor block_21_ffn_rmsnorm_abs = abs(x = block_21_residual_1)[name = string("block_21_ffn_rmsnorm_abs")]; + tensor block_21_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_21_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_21_ffn_rmsnorm_maxval = reduce_max(axes = block_21_ffn_rmsnorm_maxval_axes_0, keep_dims = block_21_ffn_rmsnorm_maxval_keep_dims_0, x = block_21_ffn_rmsnorm_abs)[name = string("block_21_ffn_rmsnorm_maxval")]; + fp16 block_21_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_21_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_21_ffn_rmsnorm_maxval_clipped = clip(alpha = block_21_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_21_ffn_rmsnorm_maxval_clipped_beta_0, x = block_21_ffn_rmsnorm_maxval)[name = string("block_21_ffn_rmsnorm_maxval_clipped")]; + tensor block_21_ffn_rmsnorm_scaled = real_div(x = block_21_residual_1, y = block_21_ffn_rmsnorm_maxval_clipped)[name = string("block_21_ffn_rmsnorm_scaled")]; + tensor block_21_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_21_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_21_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_21_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_21_ffn_rmsnorm_scaled)[name = string("block_21_ffn_rmsnorm_squared_sum")]; + fp16 block_21_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_21_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_21_ffn_rmsnorm_rsqrt_epsilon_0, x = block_21_ffn_rmsnorm_squared_sum)[name = string("block_21_ffn_rmsnorm_rsqrt")]; + fp16 block_21_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_21_ffn_rmsnorm_dim_scaled = mul(x = block_21_ffn_rmsnorm_scaled, y = block_21_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_21_ffn_rmsnorm_dim_scaled")]; + tensor block_21_ffn_rmsnorm_normalized = mul(x = block_21_ffn_rmsnorm_dim_scaled, y = block_21_ffn_rmsnorm_rsqrt)[name = string("block_21_ffn_rmsnorm_normalized")]; + tensor block_21_ffn_rmsnorm_y_0 = const()[name = string("block_21_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536600128)))]; + tensor block_21_ffn_rmsnorm = mul(x = block_21_ffn_rmsnorm_normalized, y = block_21_ffn_rmsnorm_y_0)[name = string("block_21_ffn_rmsnorm")]; + tensor block_21_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536601984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539870656))))[name = string("block_21_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_102 = constexpr_blockwise_shift_scale(data = block_21_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540026368))))[name = string("constexpr_blockwise_shift_scale_102")]; + tensor block_21_ffn_inproj_strides_0 = const()[name = string("block_21_ffn_inproj_strides_0"), val = tensor([1])]; + string block_21_ffn_inproj_pad_type_0 = const()[name = string("block_21_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_21_ffn_inproj_pad_0 = const()[name = string("block_21_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_21_ffn_inproj_dilations_0 = const()[name = string("block_21_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_21_ffn_inproj_groups_0 = const()[name = string("block_21_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_21_ffn_inproj = conv(dilations = block_21_ffn_inproj_dilations_0, groups = block_21_ffn_inproj_groups_0, pad = block_21_ffn_inproj_pad_0, pad_type = block_21_ffn_inproj_pad_type_0, strides = block_21_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_102, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_inproj")]; + tensor block_21_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540036160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543304832))))[name = string("block_21_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_103 = constexpr_blockwise_shift_scale(data = block_21_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543460544))))[name = string("constexpr_blockwise_shift_scale_103")]; + tensor block_21_ffn_g_strides_0 = const()[name = string("block_21_ffn_g_strides_0"), val = tensor([1])]; + string block_21_ffn_g_pad_type_0 = const()[name = string("block_21_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_21_ffn_g_pad_0 = const()[name = string("block_21_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_21_ffn_g_dilations_0 = const()[name = string("block_21_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_21_ffn_g_groups_0 = const()[name = string("block_21_ffn_g_groups_0"), val = int32(1)]; + tensor block_21_ffn_g = conv(dilations = block_21_ffn_g_dilations_0, groups = block_21_ffn_g_groups_0, pad = block_21_ffn_g_pad_0, pad_type = block_21_ffn_g_pad_type_0, strides = block_21_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_103, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_g")]; + tensor block_21_ffn_g_activation = silu(x = block_21_ffn_g)[name = string("block_21_ffn_g_activation")]; + tensor block_21_ffn_x_gated = mul(x = block_21_ffn_inproj, y = block_21_ffn_g_activation)[name = string("block_21_ffn_x_gated")]; + tensor block_21_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543470336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546739008))))[name = string("block_21_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_104 = constexpr_blockwise_shift_scale(data = block_21_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546767744))))[name = string("constexpr_blockwise_shift_scale_104")]; + tensor block_21_ffn_outproj_strides_0 = const()[name = string("block_21_ffn_outproj_strides_0"), val = tensor([1])]; + string block_21_ffn_outproj_pad_type_0 = const()[name = string("block_21_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_21_ffn_outproj_pad_0 = const()[name = string("block_21_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_21_ffn_outproj_dilations_0 = const()[name = string("block_21_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_21_ffn_outproj_groups_0 = const()[name = string("block_21_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_21_ffn_outproj = conv(dilations = block_21_ffn_outproj_dilations_0, groups = block_21_ffn_outproj_groups_0, pad = block_21_ffn_outproj_pad_0, pad_type = block_21_ffn_outproj_pad_type_0, strides = block_21_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_104, x = block_21_ffn_x_gated)[name = string("block_21_ffn_outproj")]; + tensor block_21_residual_2 = add(x = block_21_ffn_outproj, y = block_21_residual_1)[name = string("block_21_residual_2")]; + tensor block_22_attention_rmsnorm_abs = abs(x = block_21_residual_2)[name = string("block_22_attention_rmsnorm_abs")]; + tensor block_22_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_22_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_22_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_22_attention_rmsnorm_maxval = reduce_max(axes = block_22_attention_rmsnorm_maxval_axes_0, keep_dims = block_22_attention_rmsnorm_maxval_keep_dims_0, x = block_22_attention_rmsnorm_abs)[name = string("block_22_attention_rmsnorm_maxval")]; + fp16 block_22_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_22_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_22_attention_rmsnorm_maxval_clipped = clip(alpha = block_22_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_22_attention_rmsnorm_maxval_clipped_beta_0, x = block_22_attention_rmsnorm_maxval)[name = string("block_22_attention_rmsnorm_maxval_clipped")]; + tensor block_22_attention_rmsnorm_scaled = real_div(x = block_21_residual_2, y = block_22_attention_rmsnorm_maxval_clipped)[name = string("block_22_attention_rmsnorm_scaled")]; + tensor block_22_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_22_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_22_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_22_attention_rmsnorm_squared_sum_keep_dims_0, x = block_22_attention_rmsnorm_scaled)[name = string("block_22_attention_rmsnorm_squared_sum")]; + fp16 block_22_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_22_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_22_attention_rmsnorm_rsqrt_epsilon_0, x = block_22_attention_rmsnorm_squared_sum)[name = string("block_22_attention_rmsnorm_rsqrt")]; + fp16 block_22_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_22_attention_rmsnorm_dim_scaled = mul(x = block_22_attention_rmsnorm_scaled, y = block_22_attention_rmsnorm_dim_scaled_y_0)[name = string("block_22_attention_rmsnorm_dim_scaled")]; + tensor block_22_attention_rmsnorm_normalized = mul(x = block_22_attention_rmsnorm_dim_scaled, y = block_22_attention_rmsnorm_rsqrt)[name = string("block_22_attention_rmsnorm_normalized")]; + tensor block_22_attention_rmsnorm_y_0 = const()[name = string("block_22_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546769600)))]; + tensor block_22_attention_rmsnorm = mul(x = block_22_attention_rmsnorm_normalized, y = block_22_attention_rmsnorm_y_0)[name = string("block_22_attention_rmsnorm")]; + tensor attention_22_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546771456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547545664))))[name = string("attention_22_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_105 = constexpr_blockwise_shift_scale(data = attention_22_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547582592))))[name = string("constexpr_blockwise_shift_scale_105")]; + tensor attention_22_qkvproj_bias_0 = const()[name = string("attention_22_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547584960)))]; + tensor attention_22_qkvproj_strides_0 = const()[name = string("attention_22_qkvproj_strides_0"), val = tensor([1])]; + string attention_22_qkvproj_pad_type_0 = const()[name = string("attention_22_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_22_qkvproj_pad_0 = const()[name = string("attention_22_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_22_qkvproj_dilations_0 = const()[name = string("attention_22_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_22_qkvproj_groups_0 = const()[name = string("attention_22_qkvproj_groups_0"), val = int32(1)]; + tensor attention_22_qkvproj = conv(bias = attention_22_qkvproj_bias_0, dilations = attention_22_qkvproj_dilations_0, groups = attention_22_qkvproj_groups_0, pad = attention_22_qkvproj_pad_0, pad_type = attention_22_qkvproj_pad_type_0, strides = attention_22_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_105, x = block_22_attention_rmsnorm)[name = string("attention_22_qkvproj")]; + tensor attention_22_head_reshape_shape_0 = const()[name = string("attention_22_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_22_head_reshape = reshape(shape = attention_22_head_reshape_shape_0, x = attention_22_qkvproj)[name = string("attention_22_head_reshape")]; + tensor attention_22_head_transpose_perm_0 = const()[name = string("attention_22_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_22_split_qkv_heads_axis_0 = const()[name = string("attention_22_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_22_split_qkv_heads_split_sizes_0 = const()[name = string("attention_22_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_22_head_transpose = transpose(perm = attention_22_head_transpose_perm_0, x = attention_22_head_reshape)[name = string("transpose_4")]; + tensor attention_22_split_qkv_heads_0, tensor attention_22_split_qkv_heads_1, tensor attention_22_split_qkv_heads_2 = split(axis = attention_22_split_qkv_heads_axis_0, split_sizes = attention_22_split_qkv_heads_split_sizes_0, x = attention_22_head_transpose)[name = string("attention_22_split_qkv_heads")]; + tensor attention_22_q_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_22_q_rope_lhs_mult")]; + int32 attention_22_q_rotate_half_split_num_splits_0 = const()[name = string("attention_22_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_22_q_rotate_half_split_axis_0 = const()[name = string("attention_22_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_22_q_rotate_half_split_0, tensor attention_22_q_rotate_half_split_1 = split(axis = attention_22_q_rotate_half_split_axis_0, num_splits = attention_22_q_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_0)[name = string("attention_22_q_rotate_half_split")]; + fp16 attention_22_q_rotate_half_neg_y_0 = const()[name = string("attention_22_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_22_q_rotate_half_neg = mul(x = attention_22_q_rotate_half_split_1, y = attention_22_q_rotate_half_neg_y_0)[name = string("attention_22_q_rotate_half_neg")]; + int32 attention_22_q_rotate_half_concat_axis_0 = const()[name = string("attention_22_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_22_q_rotate_half_concat_interleave_0 = const()[name = string("attention_22_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_22_q_rotate_half_concat = concat(axis = attention_22_q_rotate_half_concat_axis_0, interleave = attention_22_q_rotate_half_concat_interleave_0, values = (attention_22_q_rotate_half_neg, attention_22_q_rotate_half_split_0))[name = string("attention_22_q_rotate_half_concat")]; + tensor attention_22_q_rope_rhs_mult = mul(x = attention_22_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_q_rope_rhs_mult")]; + tensor attention_22_q_rope = add(x = attention_22_q_rope_lhs_mult, y = attention_22_q_rope_rhs_mult)[name = string("attention_22_q_rope")]; + tensor attention_22_k_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_22_k_rope_lhs_mult")]; + int32 attention_22_k_rotate_half_split_num_splits_0 = const()[name = string("attention_22_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_22_k_rotate_half_split_axis_0 = const()[name = string("attention_22_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_22_k_rotate_half_split_0, tensor attention_22_k_rotate_half_split_1 = split(axis = attention_22_k_rotate_half_split_axis_0, num_splits = attention_22_k_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_1)[name = string("attention_22_k_rotate_half_split")]; + fp16 attention_22_k_rotate_half_neg_y_0 = const()[name = string("attention_22_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_22_k_rotate_half_neg = mul(x = attention_22_k_rotate_half_split_1, y = attention_22_k_rotate_half_neg_y_0)[name = string("attention_22_k_rotate_half_neg")]; + int32 attention_22_k_rotate_half_concat_axis_0 = const()[name = string("attention_22_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_22_k_rotate_half_concat_interleave_0 = const()[name = string("attention_22_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_22_k_rotate_half_concat = concat(axis = attention_22_k_rotate_half_concat_axis_0, interleave = attention_22_k_rotate_half_concat_interleave_0, values = (attention_22_k_rotate_half_neg, attention_22_k_rotate_half_split_0))[name = string("attention_22_k_rotate_half_concat")]; + tensor attention_22_k_rope_rhs_mult = mul(x = attention_22_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_k_rope_rhs_mult")]; + tensor attention_22_k_rope = add(x = attention_22_k_rope_lhs_mult, y = attention_22_k_rope_rhs_mult)[name = string("attention_22_k_rope")]; + int32 attention_22_q_splits_axis_0 = const()[name = string("attention_22_q_splits_axis_0"), val = int32(1)]; + int32 attention_22_q_splits_num_splits_0 = const()[name = string("attention_22_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_22_q_splits_0, tensor attention_22_q_splits_1 = split(axis = attention_22_q_splits_axis_0, num_splits = attention_22_q_splits_num_splits_0, x = attention_22_q_rope)[name = string("attention_22_q_splits")]; + tensor attention_22_update_begin_0_values0_0 = const()[name = string("attention_22_update_begin_0_values0_0"), val = tensor([22])]; + tensor attention_22_update_begin_0_values1_0 = const()[name = string("attention_22_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_22_update_begin_0_values3_0 = const()[name = string("attention_22_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_22_update_begin_0_axis_0 = const()[name = string("attention_22_update_begin_0_axis_0"), val = int32(0)]; + bool attention_22_update_begin_0_interleave_0 = const()[name = string("attention_22_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_22_update_begin_0 = concat(axis = attention_22_update_begin_0_axis_0, interleave = attention_22_update_begin_0_interleave_0, values = (attention_22_update_begin_0_values0_0, attention_22_update_begin_0_values1_0, query_pos1, attention_22_update_begin_0_values3_0))[name = string("attention_22_update_begin_0")]; + tensor attention_22_update_end_0_values0_0 = const()[name = string("attention_22_update_end_0_values0_0"), val = tensor([23])]; + tensor attention_22_update_end_0_values1_0 = const()[name = string("attention_22_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_22_update_end_0_values3_0 = const()[name = string("attention_22_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_22_update_end_0_axis_0 = const()[name = string("attention_22_update_end_0_axis_0"), val = int32(0)]; + bool attention_22_update_end_0_interleave_0 = const()[name = string("attention_22_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_22_update_end_0 = concat(axis = attention_22_update_end_0_axis_0, interleave = attention_22_update_end_0_interleave_0, values = (attention_22_update_end_0_values0_0, attention_22_update_end_0_values1_0, end_pos_0, attention_22_update_end_0_values3_0))[name = string("attention_22_update_end_0")]; + tensor attention_22_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_updated_key_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_key_cache_0_squeeze_mask_0, update = attention_22_k_rope, x = coreml_update_state_42)[name = string("attention_22_updated_key_cache_0")]; + write_state(data = attention_22_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_44_write_state")]; + tensor coreml_update_state_44 = read_state(input = key_cache_state)[name = string("coreml_update_state_44")]; + tensor attention_22_key_cache_begin_0 = const()[name = string("attention_22_key_cache_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor attention_22_key_cache_end_0 = const()[name = string("attention_22_key_cache_end_0"), val = tensor([23, 2, 512, 64])]; + tensor attention_22_key_cache_squeeze_mask_0 = const()[name = string("attention_22_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_key_cache = slice_by_index(begin = attention_22_key_cache_begin_0, end = attention_22_key_cache_end_0, squeeze_mask = attention_22_key_cache_squeeze_mask_0, x = coreml_update_state_44)[name = string("attention_22_key_cache")]; + int32 attention_22_key_cache_head_axis_0 = const()[name = string("attention_22_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_22_key_cache_head_num_splits_0 = const()[name = string("attention_22_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_22_key_cache_head_0, tensor attention_22_key_cache_head_1 = split(axis = attention_22_key_cache_head_axis_0, num_splits = attention_22_key_cache_head_num_splits_0, x = attention_22_key_cache)[name = string("attention_22_key_cache_head")]; + tensor attention_22_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_updated_value_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_value_cache_0_squeeze_mask_0, update = attention_22_split_qkv_heads_2, x = coreml_update_state_43)[name = string("attention_22_updated_value_cache_0")]; + write_state(data = attention_22_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_45_write_state")]; + tensor coreml_update_state_45 = read_state(input = value_cache_state)[name = string("coreml_update_state_45")]; + tensor attention_22_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_22_slice_current_layer_value_cache_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor attention_22_slice_current_layer_value_cache_end_0 = const()[name = string("attention_22_slice_current_layer_value_cache_end_0"), val = tensor([23, 2, 512, 64])]; + tensor attention_22_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_22_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_slice_current_layer_value_cache = slice_by_index(begin = attention_22_slice_current_layer_value_cache_begin_0, end = attention_22_slice_current_layer_value_cache_end_0, squeeze_mask = attention_22_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_45)[name = string("attention_22_slice_current_layer_value_cache")]; + int32 attention_22_slice_value_cache_heads_axis_0 = const()[name = string("attention_22_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_22_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_22_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_22_slice_value_cache_heads_0, tensor attention_22_slice_value_cache_heads_1 = split(axis = attention_22_slice_value_cache_heads_axis_0, num_splits = attention_22_slice_value_cache_heads_num_splits_0, x = attention_22_slice_current_layer_value_cache)[name = string("attention_22_slice_value_cache_heads")]; + bool attention_22_scores_0_transpose_y_0 = const()[name = string("attention_22_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_22_scores_0_transpose_x_0 = const()[name = string("attention_22_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_22_scores_0 = matmul(transpose_x = attention_22_scores_0_transpose_x_0, transpose_y = attention_22_scores_0_transpose_y_0, x = attention_22_key_cache_head_0, y = attention_22_q_splits_0)[name = string("attention_22_scores_0")]; + fp16 attention_22_scaled_scores_0_y_0 = const()[name = string("attention_22_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_22_scaled_scores_0 = mul(x = attention_22_scores_0, y = attention_22_scaled_scores_0_y_0)[name = string("attention_22_scaled_scores_0")]; + tensor attention_22_masked_scaled_scores_0 = add(x = attention_22_scaled_scores_0, y = transpose_0)[name = string("attention_22_masked_scaled_scores_0")]; + int32 softmax_44_axis_0 = const()[name = string("softmax_44_axis_0"), val = int32(-2)]; + tensor softmax_44 = softmax(axis = softmax_44_axis_0, x = attention_22_masked_scaled_scores_0)[name = string("softmax_44")]; + bool attention_22_attention_0_transpose_x_0 = const()[name = string("attention_22_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_22_attention_0_transpose_y_0 = const()[name = string("attention_22_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_22_attention_0 = matmul(transpose_x = attention_22_attention_0_transpose_x_0, transpose_y = attention_22_attention_0_transpose_y_0, x = softmax_44, y = attention_22_slice_value_cache_heads_0)[name = string("attention_22_attention_0")]; + bool attention_22_scores_1_transpose_y_0 = const()[name = string("attention_22_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_22_scores_1_transpose_x_0 = const()[name = string("attention_22_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_22_scores_1 = matmul(transpose_x = attention_22_scores_1_transpose_x_0, transpose_y = attention_22_scores_1_transpose_y_0, x = attention_22_key_cache_head_1, y = attention_22_q_splits_1)[name = string("attention_22_scores_1")]; + fp16 attention_22_scaled_scores_1_y_0 = const()[name = string("attention_22_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_22_scaled_scores_1 = mul(x = attention_22_scores_1, y = attention_22_scaled_scores_1_y_0)[name = string("attention_22_scaled_scores_1")]; + tensor attention_22_masked_scaled_scores_1 = add(x = attention_22_scaled_scores_1, y = transpose_0)[name = string("attention_22_masked_scaled_scores_1")]; + int32 softmax_45_axis_0 = const()[name = string("softmax_45_axis_0"), val = int32(-2)]; + tensor softmax_45 = softmax(axis = softmax_45_axis_0, x = attention_22_masked_scaled_scores_1)[name = string("softmax_45")]; + bool attention_22_attention_1_transpose_x_0 = const()[name = string("attention_22_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_22_attention_1_transpose_y_0 = const()[name = string("attention_22_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_22_attention_1 = matmul(transpose_x = attention_22_attention_1_transpose_x_0, transpose_y = attention_22_attention_1_transpose_y_0, x = softmax_45, y = attention_22_slice_value_cache_heads_1)[name = string("attention_22_attention_1")]; + int32 attention_22_concat_attention_all_heads_axis_0 = const()[name = string("attention_22_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_22_concat_attention_all_heads_interleave_0 = const()[name = string("attention_22_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_22_concat_attention_all_heads = concat(axis = attention_22_concat_attention_all_heads_axis_0, interleave = attention_22_concat_attention_all_heads_interleave_0, values = (attention_22_attention_0, attention_22_attention_1))[name = string("attention_22_concat_attention_all_heads")]; + tensor attention_22_channels_first_retransposed_perm_0 = const()[name = string("attention_22_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_22_reshaped_shape_0 = const()[name = string("attention_22_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_22_channels_first_retransposed = transpose(perm = attention_22_channels_first_retransposed_perm_0, x = attention_22_concat_attention_all_heads)[name = string("transpose_3")]; + tensor attention_22_reshaped = reshape(shape = attention_22_reshaped_shape_0, x = attention_22_channels_first_retransposed)[name = string("attention_22_reshaped")]; + tensor attention_22_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547587328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548189504))))[name = string("attention_22_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_106 = constexpr_blockwise_shift_scale(data = attention_22_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548218240))))[name = string("constexpr_blockwise_shift_scale_106")]; + tensor attention_22_outproj_strides_0 = const()[name = string("attention_22_outproj_strides_0"), val = tensor([1])]; + string attention_22_outproj_pad_type_0 = const()[name = string("attention_22_outproj_pad_type_0"), val = string("valid")]; + tensor attention_22_outproj_pad_0 = const()[name = string("attention_22_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_22_outproj_dilations_0 = const()[name = string("attention_22_outproj_dilations_0"), val = tensor([1])]; + int32 attention_22_outproj_groups_0 = const()[name = string("attention_22_outproj_groups_0"), val = int32(1)]; + tensor attention_22_outproj = conv(dilations = attention_22_outproj_dilations_0, groups = attention_22_outproj_groups_0, pad = attention_22_outproj_pad_0, pad_type = attention_22_outproj_pad_type_0, strides = attention_22_outproj_strides_0, weight = constexpr_blockwise_shift_scale_106, x = attention_22_reshaped)[name = string("attention_22_outproj")]; + tensor block_22_residual_1 = add(x = block_21_residual_2, y = attention_22_outproj)[name = string("block_22_residual_1")]; + tensor block_22_ffn_rmsnorm_abs = abs(x = block_22_residual_1)[name = string("block_22_ffn_rmsnorm_abs")]; + tensor block_22_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_22_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_22_ffn_rmsnorm_maxval = reduce_max(axes = block_22_ffn_rmsnorm_maxval_axes_0, keep_dims = block_22_ffn_rmsnorm_maxval_keep_dims_0, x = block_22_ffn_rmsnorm_abs)[name = string("block_22_ffn_rmsnorm_maxval")]; + fp16 block_22_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_22_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_22_ffn_rmsnorm_maxval_clipped = clip(alpha = block_22_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_22_ffn_rmsnorm_maxval_clipped_beta_0, x = block_22_ffn_rmsnorm_maxval)[name = string("block_22_ffn_rmsnorm_maxval_clipped")]; + tensor block_22_ffn_rmsnorm_scaled = real_div(x = block_22_residual_1, y = block_22_ffn_rmsnorm_maxval_clipped)[name = string("block_22_ffn_rmsnorm_scaled")]; + tensor block_22_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_22_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_22_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_22_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_22_ffn_rmsnorm_scaled)[name = string("block_22_ffn_rmsnorm_squared_sum")]; + fp16 block_22_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_22_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_22_ffn_rmsnorm_rsqrt_epsilon_0, x = block_22_ffn_rmsnorm_squared_sum)[name = string("block_22_ffn_rmsnorm_rsqrt")]; + fp16 block_22_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_22_ffn_rmsnorm_dim_scaled = mul(x = block_22_ffn_rmsnorm_scaled, y = block_22_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_22_ffn_rmsnorm_dim_scaled")]; + tensor block_22_ffn_rmsnorm_normalized = mul(x = block_22_ffn_rmsnorm_dim_scaled, y = block_22_ffn_rmsnorm_rsqrt)[name = string("block_22_ffn_rmsnorm_normalized")]; + tensor block_22_ffn_rmsnorm_y_0 = const()[name = string("block_22_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548220096)))]; + tensor block_22_ffn_rmsnorm = mul(x = block_22_ffn_rmsnorm_normalized, y = block_22_ffn_rmsnorm_y_0)[name = string("block_22_ffn_rmsnorm")]; + tensor block_22_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548221952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551490624))))[name = string("block_22_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_107 = constexpr_blockwise_shift_scale(data = block_22_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551646336))))[name = string("constexpr_blockwise_shift_scale_107")]; + tensor block_22_ffn_inproj_strides_0 = const()[name = string("block_22_ffn_inproj_strides_0"), val = tensor([1])]; + string block_22_ffn_inproj_pad_type_0 = const()[name = string("block_22_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_22_ffn_inproj_pad_0 = const()[name = string("block_22_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_22_ffn_inproj_dilations_0 = const()[name = string("block_22_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_22_ffn_inproj_groups_0 = const()[name = string("block_22_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_22_ffn_inproj = conv(dilations = block_22_ffn_inproj_dilations_0, groups = block_22_ffn_inproj_groups_0, pad = block_22_ffn_inproj_pad_0, pad_type = block_22_ffn_inproj_pad_type_0, strides = block_22_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_107, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_inproj")]; + tensor block_22_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551656128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554924800))))[name = string("block_22_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_108 = constexpr_blockwise_shift_scale(data = block_22_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555080512))))[name = string("constexpr_blockwise_shift_scale_108")]; + tensor block_22_ffn_g_strides_0 = const()[name = string("block_22_ffn_g_strides_0"), val = tensor([1])]; + string block_22_ffn_g_pad_type_0 = const()[name = string("block_22_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_22_ffn_g_pad_0 = const()[name = string("block_22_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_22_ffn_g_dilations_0 = const()[name = string("block_22_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_22_ffn_g_groups_0 = const()[name = string("block_22_ffn_g_groups_0"), val = int32(1)]; + tensor block_22_ffn_g = conv(dilations = block_22_ffn_g_dilations_0, groups = block_22_ffn_g_groups_0, pad = block_22_ffn_g_pad_0, pad_type = block_22_ffn_g_pad_type_0, strides = block_22_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_108, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_g")]; + tensor block_22_ffn_g_activation = silu(x = block_22_ffn_g)[name = string("block_22_ffn_g_activation")]; + tensor block_22_ffn_x_gated = mul(x = block_22_ffn_inproj, y = block_22_ffn_g_activation)[name = string("block_22_ffn_x_gated")]; + tensor block_22_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555090304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558358976))))[name = string("block_22_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_109 = constexpr_blockwise_shift_scale(data = block_22_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558387712))))[name = string("constexpr_blockwise_shift_scale_109")]; + tensor block_22_ffn_outproj_strides_0 = const()[name = string("block_22_ffn_outproj_strides_0"), val = tensor([1])]; + string block_22_ffn_outproj_pad_type_0 = const()[name = string("block_22_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_22_ffn_outproj_pad_0 = const()[name = string("block_22_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_22_ffn_outproj_dilations_0 = const()[name = string("block_22_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_22_ffn_outproj_groups_0 = const()[name = string("block_22_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_22_ffn_outproj = conv(dilations = block_22_ffn_outproj_dilations_0, groups = block_22_ffn_outproj_groups_0, pad = block_22_ffn_outproj_pad_0, pad_type = block_22_ffn_outproj_pad_type_0, strides = block_22_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_109, x = block_22_ffn_x_gated)[name = string("block_22_ffn_outproj")]; + tensor block_22_residual_2 = add(x = block_22_ffn_outproj, y = block_22_residual_1)[name = string("block_22_residual_2")]; + tensor block_23_attention_rmsnorm_abs = abs(x = block_22_residual_2)[name = string("block_23_attention_rmsnorm_abs")]; + tensor block_23_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_23_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_23_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_23_attention_rmsnorm_maxval = reduce_max(axes = block_23_attention_rmsnorm_maxval_axes_0, keep_dims = block_23_attention_rmsnorm_maxval_keep_dims_0, x = block_23_attention_rmsnorm_abs)[name = string("block_23_attention_rmsnorm_maxval")]; + fp16 block_23_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_23_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_23_attention_rmsnorm_maxval_clipped = clip(alpha = block_23_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_23_attention_rmsnorm_maxval_clipped_beta_0, x = block_23_attention_rmsnorm_maxval)[name = string("block_23_attention_rmsnorm_maxval_clipped")]; + tensor block_23_attention_rmsnorm_scaled = real_div(x = block_22_residual_2, y = block_23_attention_rmsnorm_maxval_clipped)[name = string("block_23_attention_rmsnorm_scaled")]; + tensor block_23_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_23_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_23_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_23_attention_rmsnorm_squared_sum_keep_dims_0, x = block_23_attention_rmsnorm_scaled)[name = string("block_23_attention_rmsnorm_squared_sum")]; + fp16 block_23_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_23_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_23_attention_rmsnorm_rsqrt_epsilon_0, x = block_23_attention_rmsnorm_squared_sum)[name = string("block_23_attention_rmsnorm_rsqrt")]; + fp16 block_23_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_23_attention_rmsnorm_dim_scaled = mul(x = block_23_attention_rmsnorm_scaled, y = block_23_attention_rmsnorm_dim_scaled_y_0)[name = string("block_23_attention_rmsnorm_dim_scaled")]; + tensor block_23_attention_rmsnorm_normalized = mul(x = block_23_attention_rmsnorm_dim_scaled, y = block_23_attention_rmsnorm_rsqrt)[name = string("block_23_attention_rmsnorm_normalized")]; + tensor block_23_attention_rmsnorm_y_0 = const()[name = string("block_23_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558389568)))]; + tensor block_23_attention_rmsnorm = mul(x = block_23_attention_rmsnorm_normalized, y = block_23_attention_rmsnorm_y_0)[name = string("block_23_attention_rmsnorm")]; + tensor attention_23_qkvproj_weight_0 = const()[name = string("attention_23_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558391424)))]; + tensor attention_23_qkvproj_bias_0 = const()[name = string("attention_23_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560455872)))]; + tensor attention_23_qkvproj_strides_0 = const()[name = string("attention_23_qkvproj_strides_0"), val = tensor([1])]; + string attention_23_qkvproj_pad_type_0 = const()[name = string("attention_23_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_23_qkvproj_pad_0 = const()[name = string("attention_23_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_23_qkvproj_dilations_0 = const()[name = string("attention_23_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_23_qkvproj_groups_0 = const()[name = string("attention_23_qkvproj_groups_0"), val = int32(1)]; + tensor attention_23_qkvproj = conv(bias = attention_23_qkvproj_bias_0, dilations = attention_23_qkvproj_dilations_0, groups = attention_23_qkvproj_groups_0, pad = attention_23_qkvproj_pad_0, pad_type = attention_23_qkvproj_pad_type_0, strides = attention_23_qkvproj_strides_0, weight = attention_23_qkvproj_weight_0, x = block_23_attention_rmsnorm)[name = string("attention_23_qkvproj")]; + tensor attention_23_head_reshape_shape_0 = const()[name = string("attention_23_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; + tensor attention_23_head_reshape = reshape(shape = attention_23_head_reshape_shape_0, x = attention_23_qkvproj)[name = string("attention_23_head_reshape")]; + tensor attention_23_head_transpose_perm_0 = const()[name = string("attention_23_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_23_split_qkv_heads_axis_0 = const()[name = string("attention_23_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_23_split_qkv_heads_split_sizes_0 = const()[name = string("attention_23_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_23_head_transpose = transpose(perm = attention_23_head_transpose_perm_0, x = attention_23_head_reshape)[name = string("transpose_2")]; + tensor attention_23_split_qkv_heads_0, tensor attention_23_split_qkv_heads_1, tensor attention_23_split_qkv_heads_2 = split(axis = attention_23_split_qkv_heads_axis_0, split_sizes = attention_23_split_qkv_heads_split_sizes_0, x = attention_23_head_transpose)[name = string("attention_23_split_qkv_heads")]; + tensor attention_23_q_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_23_q_rope_lhs_mult")]; + int32 attention_23_q_rotate_half_split_num_splits_0 = const()[name = string("attention_23_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_23_q_rotate_half_split_axis_0 = const()[name = string("attention_23_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_23_q_rotate_half_split_0, tensor attention_23_q_rotate_half_split_1 = split(axis = attention_23_q_rotate_half_split_axis_0, num_splits = attention_23_q_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_0)[name = string("attention_23_q_rotate_half_split")]; + fp16 attention_23_q_rotate_half_neg_y_0 = const()[name = string("attention_23_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_23_q_rotate_half_neg = mul(x = attention_23_q_rotate_half_split_1, y = attention_23_q_rotate_half_neg_y_0)[name = string("attention_23_q_rotate_half_neg")]; + int32 attention_23_q_rotate_half_concat_axis_0 = const()[name = string("attention_23_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_23_q_rotate_half_concat_interleave_0 = const()[name = string("attention_23_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_23_q_rotate_half_concat = concat(axis = attention_23_q_rotate_half_concat_axis_0, interleave = attention_23_q_rotate_half_concat_interleave_0, values = (attention_23_q_rotate_half_neg, attention_23_q_rotate_half_split_0))[name = string("attention_23_q_rotate_half_concat")]; + tensor attention_23_q_rope_rhs_mult = mul(x = attention_23_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_q_rope_rhs_mult")]; + tensor attention_23_q_rope = add(x = attention_23_q_rope_lhs_mult, y = attention_23_q_rope_rhs_mult)[name = string("attention_23_q_rope")]; + tensor attention_23_k_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_23_k_rope_lhs_mult")]; + int32 attention_23_k_rotate_half_split_num_splits_0 = const()[name = string("attention_23_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_23_k_rotate_half_split_axis_0 = const()[name = string("attention_23_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_23_k_rotate_half_split_0, tensor attention_23_k_rotate_half_split_1 = split(axis = attention_23_k_rotate_half_split_axis_0, num_splits = attention_23_k_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_1)[name = string("attention_23_k_rotate_half_split")]; + fp16 attention_23_k_rotate_half_neg_y_0 = const()[name = string("attention_23_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_23_k_rotate_half_neg = mul(x = attention_23_k_rotate_half_split_1, y = attention_23_k_rotate_half_neg_y_0)[name = string("attention_23_k_rotate_half_neg")]; + int32 attention_23_k_rotate_half_concat_axis_0 = const()[name = string("attention_23_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_23_k_rotate_half_concat_interleave_0 = const()[name = string("attention_23_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_23_k_rotate_half_concat = concat(axis = attention_23_k_rotate_half_concat_axis_0, interleave = attention_23_k_rotate_half_concat_interleave_0, values = (attention_23_k_rotate_half_neg, attention_23_k_rotate_half_split_0))[name = string("attention_23_k_rotate_half_concat")]; + tensor attention_23_k_rope_rhs_mult = mul(x = attention_23_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_k_rope_rhs_mult")]; + tensor attention_23_k_rope = add(x = attention_23_k_rope_lhs_mult, y = attention_23_k_rope_rhs_mult)[name = string("attention_23_k_rope")]; + int32 attention_23_q_splits_axis_0 = const()[name = string("attention_23_q_splits_axis_0"), val = int32(1)]; + int32 attention_23_q_splits_num_splits_0 = const()[name = string("attention_23_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_23_q_splits_0, tensor attention_23_q_splits_1 = split(axis = attention_23_q_splits_axis_0, num_splits = attention_23_q_splits_num_splits_0, x = attention_23_q_rope)[name = string("attention_23_q_splits")]; + tensor attention_23_update_begin_0_values0_0 = const()[name = string("attention_23_update_begin_0_values0_0"), val = tensor([23])]; + tensor attention_23_update_begin_0_values1_0 = const()[name = string("attention_23_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_23_update_begin_0_values3_0 = const()[name = string("attention_23_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_23_update_begin_0_axis_0 = const()[name = string("attention_23_update_begin_0_axis_0"), val = int32(0)]; + bool attention_23_update_begin_0_interleave_0 = const()[name = string("attention_23_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_23_update_begin_0 = concat(axis = attention_23_update_begin_0_axis_0, interleave = attention_23_update_begin_0_interleave_0, values = (attention_23_update_begin_0_values0_0, attention_23_update_begin_0_values1_0, query_pos1, attention_23_update_begin_0_values3_0))[name = string("attention_23_update_begin_0")]; + tensor attention_23_update_end_0_values0_0 = const()[name = string("attention_23_update_end_0_values0_0"), val = tensor([24])]; + tensor attention_23_update_end_0_values1_0 = const()[name = string("attention_23_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_23_update_end_0_values3_0 = const()[name = string("attention_23_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_23_update_end_0_axis_0 = const()[name = string("attention_23_update_end_0_axis_0"), val = int32(0)]; + bool attention_23_update_end_0_interleave_0 = const()[name = string("attention_23_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_23_update_end_0 = concat(axis = attention_23_update_end_0_axis_0, interleave = attention_23_update_end_0_interleave_0, values = (attention_23_update_end_0_values0_0, attention_23_update_end_0_values1_0, end_pos_0, attention_23_update_end_0_values3_0))[name = string("attention_23_update_end_0")]; + tensor attention_23_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_updated_key_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_key_cache_0_squeeze_mask_0, update = attention_23_k_rope, x = coreml_update_state_44)[name = string("attention_23_updated_key_cache_0")]; + write_state(data = attention_23_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_46_write_state")]; + tensor coreml_update_state_46 = read_state(input = key_cache_state)[name = string("coreml_update_state_46")]; + tensor attention_23_key_cache_begin_0 = const()[name = string("attention_23_key_cache_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor attention_23_key_cache_end_0 = const()[name = string("attention_23_key_cache_end_0"), val = tensor([24, 2, 512, 64])]; + tensor attention_23_key_cache_squeeze_mask_0 = const()[name = string("attention_23_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_key_cache = slice_by_index(begin = attention_23_key_cache_begin_0, end = attention_23_key_cache_end_0, squeeze_mask = attention_23_key_cache_squeeze_mask_0, x = coreml_update_state_46)[name = string("attention_23_key_cache")]; + int32 attention_23_key_cache_head_axis_0 = const()[name = string("attention_23_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_23_key_cache_head_num_splits_0 = const()[name = string("attention_23_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_23_key_cache_head_0, tensor attention_23_key_cache_head_1 = split(axis = attention_23_key_cache_head_axis_0, num_splits = attention_23_key_cache_head_num_splits_0, x = attention_23_key_cache)[name = string("attention_23_key_cache_head")]; + tensor attention_23_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_updated_value_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_value_cache_0_squeeze_mask_0, update = attention_23_split_qkv_heads_2, x = coreml_update_state_45)[name = string("attention_23_updated_value_cache_0")]; + write_state(data = attention_23_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_47_write_state")]; + tensor coreml_update_state_47 = read_state(input = value_cache_state)[name = string("coreml_update_state_47")]; + tensor attention_23_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_23_slice_current_layer_value_cache_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor attention_23_slice_current_layer_value_cache_end_0 = const()[name = string("attention_23_slice_current_layer_value_cache_end_0"), val = tensor([24, 2, 512, 64])]; + tensor attention_23_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_23_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_slice_current_layer_value_cache = slice_by_index(begin = attention_23_slice_current_layer_value_cache_begin_0, end = attention_23_slice_current_layer_value_cache_end_0, squeeze_mask = attention_23_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_47)[name = string("attention_23_slice_current_layer_value_cache")]; + int32 attention_23_slice_value_cache_heads_axis_0 = const()[name = string("attention_23_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_23_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_23_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_23_slice_value_cache_heads_0, tensor attention_23_slice_value_cache_heads_1 = split(axis = attention_23_slice_value_cache_heads_axis_0, num_splits = attention_23_slice_value_cache_heads_num_splits_0, x = attention_23_slice_current_layer_value_cache)[name = string("attention_23_slice_value_cache_heads")]; + bool attention_23_scores_0_transpose_y_0 = const()[name = string("attention_23_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_23_scores_0_transpose_x_0 = const()[name = string("attention_23_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_23_scores_0 = matmul(transpose_x = attention_23_scores_0_transpose_x_0, transpose_y = attention_23_scores_0_transpose_y_0, x = attention_23_key_cache_head_0, y = attention_23_q_splits_0)[name = string("attention_23_scores_0")]; + fp16 attention_23_scaled_scores_0_y_0 = const()[name = string("attention_23_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_23_scaled_scores_0 = mul(x = attention_23_scores_0, y = attention_23_scaled_scores_0_y_0)[name = string("attention_23_scaled_scores_0")]; + tensor attention_23_masked_scaled_scores_0 = add(x = attention_23_scaled_scores_0, y = transpose_0)[name = string("attention_23_masked_scaled_scores_0")]; + int32 softmax_46_axis_0 = const()[name = string("softmax_46_axis_0"), val = int32(-2)]; + tensor softmax_46 = softmax(axis = softmax_46_axis_0, x = attention_23_masked_scaled_scores_0)[name = string("softmax_46")]; + bool attention_23_attention_0_transpose_x_0 = const()[name = string("attention_23_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_23_attention_0_transpose_y_0 = const()[name = string("attention_23_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_23_attention_0 = matmul(transpose_x = attention_23_attention_0_transpose_x_0, transpose_y = attention_23_attention_0_transpose_y_0, x = softmax_46, y = attention_23_slice_value_cache_heads_0)[name = string("attention_23_attention_0")]; + bool attention_23_scores_1_transpose_y_0 = const()[name = string("attention_23_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_23_scores_1_transpose_x_0 = const()[name = string("attention_23_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_23_scores_1 = matmul(transpose_x = attention_23_scores_1_transpose_x_0, transpose_y = attention_23_scores_1_transpose_y_0, x = attention_23_key_cache_head_1, y = attention_23_q_splits_1)[name = string("attention_23_scores_1")]; + fp16 attention_23_scaled_scores_1_y_0 = const()[name = string("attention_23_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_23_scaled_scores_1 = mul(x = attention_23_scores_1, y = attention_23_scaled_scores_1_y_0)[name = string("attention_23_scaled_scores_1")]; + tensor attention_23_masked_scaled_scores_1 = add(x = attention_23_scaled_scores_1, y = transpose_0)[name = string("attention_23_masked_scaled_scores_1")]; + int32 softmax_47_axis_0 = const()[name = string("softmax_47_axis_0"), val = int32(-2)]; + tensor softmax_47 = softmax(axis = softmax_47_axis_0, x = attention_23_masked_scaled_scores_1)[name = string("softmax_47")]; + bool attention_23_attention_1_transpose_x_0 = const()[name = string("attention_23_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_23_attention_1_transpose_y_0 = const()[name = string("attention_23_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_23_attention_1 = matmul(transpose_x = attention_23_attention_1_transpose_x_0, transpose_y = attention_23_attention_1_transpose_y_0, x = softmax_47, y = attention_23_slice_value_cache_heads_1)[name = string("attention_23_attention_1")]; + int32 attention_23_concat_attention_all_heads_axis_0 = const()[name = string("attention_23_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_23_concat_attention_all_heads_interleave_0 = const()[name = string("attention_23_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_23_concat_attention_all_heads = concat(axis = attention_23_concat_attention_all_heads_axis_0, interleave = attention_23_concat_attention_all_heads_interleave_0, values = (attention_23_attention_0, attention_23_attention_1))[name = string("attention_23_concat_attention_all_heads")]; + tensor attention_23_channels_first_retransposed_perm_0 = const()[name = string("attention_23_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_23_reshaped_shape_0 = const()[name = string("attention_23_reshaped_shape_0"), val = tensor([1, 896, 1])]; + tensor attention_23_channels_first_retransposed = transpose(perm = attention_23_channels_first_retransposed_perm_0, x = attention_23_concat_attention_all_heads)[name = string("transpose_1")]; + tensor attention_23_reshaped = reshape(shape = attention_23_reshaped_shape_0, x = attention_23_channels_first_retransposed)[name = string("attention_23_reshaped")]; + tensor attention_23_outproj_weight_0 = const()[name = string("attention_23_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560458240)))]; + tensor attention_23_outproj_strides_0 = const()[name = string("attention_23_outproj_strides_0"), val = tensor([1])]; + string attention_23_outproj_pad_type_0 = const()[name = string("attention_23_outproj_pad_type_0"), val = string("valid")]; + tensor attention_23_outproj_pad_0 = const()[name = string("attention_23_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_23_outproj_dilations_0 = const()[name = string("attention_23_outproj_dilations_0"), val = tensor([1])]; + int32 attention_23_outproj_groups_0 = const()[name = string("attention_23_outproj_groups_0"), val = int32(1)]; + tensor attention_23_outproj = conv(dilations = attention_23_outproj_dilations_0, groups = attention_23_outproj_groups_0, pad = attention_23_outproj_pad_0, pad_type = attention_23_outproj_pad_type_0, strides = attention_23_outproj_strides_0, weight = attention_23_outproj_weight_0, x = attention_23_reshaped)[name = string("attention_23_outproj")]; + tensor block_23_residual_1 = add(x = block_22_residual_2, y = attention_23_outproj)[name = string("block_23_residual_1")]; + tensor block_23_ffn_rmsnorm_abs = abs(x = block_23_residual_1)[name = string("block_23_ffn_rmsnorm_abs")]; + tensor block_23_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_23_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_23_ffn_rmsnorm_maxval = reduce_max(axes = block_23_ffn_rmsnorm_maxval_axes_0, keep_dims = block_23_ffn_rmsnorm_maxval_keep_dims_0, x = block_23_ffn_rmsnorm_abs)[name = string("block_23_ffn_rmsnorm_maxval")]; + fp16 block_23_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_23_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_23_ffn_rmsnorm_maxval_clipped = clip(alpha = block_23_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_23_ffn_rmsnorm_maxval_clipped_beta_0, x = block_23_ffn_rmsnorm_maxval)[name = string("block_23_ffn_rmsnorm_maxval_clipped")]; + tensor block_23_ffn_rmsnorm_scaled = real_div(x = block_23_residual_1, y = block_23_ffn_rmsnorm_maxval_clipped)[name = string("block_23_ffn_rmsnorm_scaled")]; + tensor block_23_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_23_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_23_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_23_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_23_ffn_rmsnorm_scaled)[name = string("block_23_ffn_rmsnorm_squared_sum")]; + fp16 block_23_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_23_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_23_ffn_rmsnorm_rsqrt_epsilon_0, x = block_23_ffn_rmsnorm_squared_sum)[name = string("block_23_ffn_rmsnorm_rsqrt")]; + fp16 block_23_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_23_ffn_rmsnorm_dim_scaled = mul(x = block_23_ffn_rmsnorm_scaled, y = block_23_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_23_ffn_rmsnorm_dim_scaled")]; + tensor block_23_ffn_rmsnorm_normalized = mul(x = block_23_ffn_rmsnorm_dim_scaled, y = block_23_ffn_rmsnorm_rsqrt)[name = string("block_23_ffn_rmsnorm_normalized")]; + tensor block_23_ffn_rmsnorm_y_0 = const()[name = string("block_23_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063936)))]; + tensor block_23_ffn_rmsnorm = mul(x = block_23_ffn_rmsnorm_normalized, y = block_23_ffn_rmsnorm_y_0)[name = string("block_23_ffn_rmsnorm")]; + tensor block_23_ffn_inproj_weight_0 = const()[name = string("block_23_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065792)))]; + tensor block_23_ffn_inproj_strides_0 = const()[name = string("block_23_ffn_inproj_strides_0"), val = tensor([1])]; + string block_23_ffn_inproj_pad_type_0 = const()[name = string("block_23_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_23_ffn_inproj_pad_0 = const()[name = string("block_23_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_23_ffn_inproj_dilations_0 = const()[name = string("block_23_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_23_ffn_inproj_groups_0 = const()[name = string("block_23_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_23_ffn_inproj = conv(dilations = block_23_ffn_inproj_dilations_0, groups = block_23_ffn_inproj_groups_0, pad = block_23_ffn_inproj_pad_0, pad_type = block_23_ffn_inproj_pad_type_0, strides = block_23_ffn_inproj_strides_0, weight = block_23_ffn_inproj_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_inproj")]; + tensor block_23_ffn_g_weight_0 = const()[name = string("block_23_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570782144)))]; + tensor block_23_ffn_g_strides_0 = const()[name = string("block_23_ffn_g_strides_0"), val = tensor([1])]; + string block_23_ffn_g_pad_type_0 = const()[name = string("block_23_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_23_ffn_g_pad_0 = const()[name = string("block_23_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_23_ffn_g_dilations_0 = const()[name = string("block_23_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_23_ffn_g_groups_0 = const()[name = string("block_23_ffn_g_groups_0"), val = int32(1)]; + tensor block_23_ffn_g = conv(dilations = block_23_ffn_g_dilations_0, groups = block_23_ffn_g_groups_0, pad = block_23_ffn_g_pad_0, pad_type = block_23_ffn_g_pad_type_0, strides = block_23_ffn_g_strides_0, weight = block_23_ffn_g_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_g")]; + tensor block_23_ffn_g_activation = silu(x = block_23_ffn_g)[name = string("block_23_ffn_g_activation")]; + tensor block_23_ffn_x_gated = mul(x = block_23_ffn_inproj, y = block_23_ffn_g_activation)[name = string("block_23_ffn_x_gated")]; + tensor block_23_ffn_outproj_weight_0 = const()[name = string("block_23_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(579498496)))]; + tensor block_23_ffn_outproj_strides_0 = const()[name = string("block_23_ffn_outproj_strides_0"), val = tensor([1])]; + string block_23_ffn_outproj_pad_type_0 = const()[name = string("block_23_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_23_ffn_outproj_pad_0 = const()[name = string("block_23_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_23_ffn_outproj_dilations_0 = const()[name = string("block_23_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_23_ffn_outproj_groups_0 = const()[name = string("block_23_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_23_ffn_outproj = conv(dilations = block_23_ffn_outproj_dilations_0, groups = block_23_ffn_outproj_groups_0, pad = block_23_ffn_outproj_pad_0, pad_type = block_23_ffn_outproj_pad_type_0, strides = block_23_ffn_outproj_strides_0, weight = block_23_ffn_outproj_weight_0, x = block_23_ffn_x_gated)[name = string("block_23_ffn_outproj")]; + tensor block_23_residual_2 = add(x = block_23_ffn_outproj, y = block_23_residual_1)[name = string("block_23_residual_2")]; + tensor final_norm_rmsnorm_abs = abs(x = block_23_residual_2)[name = string("final_norm_rmsnorm_abs")]; + tensor final_norm_rmsnorm_maxval_axes_0 = const()[name = string("final_norm_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool final_norm_rmsnorm_maxval_keep_dims_0 = const()[name = string("final_norm_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor final_norm_rmsnorm_maxval = reduce_max(axes = final_norm_rmsnorm_maxval_axes_0, keep_dims = final_norm_rmsnorm_maxval_keep_dims_0, x = final_norm_rmsnorm_abs)[name = string("final_norm_rmsnorm_maxval")]; + fp16 final_norm_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 final_norm_rmsnorm_maxval_clipped_beta_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor final_norm_rmsnorm_maxval_clipped = clip(alpha = final_norm_rmsnorm_maxval_clipped_alpha_0, beta = final_norm_rmsnorm_maxval_clipped_beta_0, x = final_norm_rmsnorm_maxval)[name = string("final_norm_rmsnorm_maxval_clipped")]; + tensor final_norm_rmsnorm_scaled = real_div(x = block_23_residual_2, y = final_norm_rmsnorm_maxval_clipped)[name = string("final_norm_rmsnorm_scaled")]; + tensor final_norm_rmsnorm_squared_sum_axes_0 = const()[name = string("final_norm_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool final_norm_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("final_norm_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor final_norm_rmsnorm_squared_sum = reduce_sum_square(axes = final_norm_rmsnorm_squared_sum_axes_0, keep_dims = final_norm_rmsnorm_squared_sum_keep_dims_0, x = final_norm_rmsnorm_scaled)[name = string("final_norm_rmsnorm_squared_sum")]; + fp16 final_norm_rmsnorm_rsqrt_epsilon_0 = const()[name = string("final_norm_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor final_norm_rmsnorm_rsqrt = rsqrt(epsilon = final_norm_rmsnorm_rsqrt_epsilon_0, x = final_norm_rmsnorm_squared_sum)[name = string("final_norm_rmsnorm_rsqrt")]; + fp16 final_norm_rmsnorm_dim_scaled_y_0 = const()[name = string("final_norm_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor final_norm_rmsnorm_dim_scaled = mul(x = final_norm_rmsnorm_scaled, y = final_norm_rmsnorm_dim_scaled_y_0)[name = string("final_norm_rmsnorm_dim_scaled")]; + tensor final_norm_rmsnorm_normalized = mul(x = final_norm_rmsnorm_dim_scaled, y = final_norm_rmsnorm_rsqrt)[name = string("final_norm_rmsnorm_normalized")]; + tensor final_norm_rmsnorm_y_0 = const()[name = string("final_norm_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588214848)))]; + tensor final_norm_rmsnorm = mul(x = final_norm_rmsnorm_normalized, y = final_norm_rmsnorm_y_0)[name = string("final_norm_rmsnorm")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588216704)))]; + tensor logits_0_strides_0 = const()[name = string("logits_0_strides_0"), val = tensor([1])]; + string logits_0_pad_type_0 = const()[name = string("logits_0_pad_type_0"), val = string("valid")]; + tensor logits_0_pad_0 = const()[name = string("logits_0_pad_0"), val = tensor([0, 0])]; + tensor logits_0_dilations_0 = const()[name = string("logits_0_dilations_0"), val = tensor([1])]; + int32 logits_0_groups_0 = const()[name = string("logits_0_groups_0"), val = int32(1)]; + tensor logits_0 = conv(dilations = logits_0_dilations_0, groups = logits_0_groups_0, pad = logits_0_pad_0, pad_type = logits_0_pad_type_0, strides = logits_0_strides_0, weight = expand_dims_1, x = final_norm_rmsnorm)[name = string("logits_0")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617576896)))]; + tensor logits_1_strides_0 = const()[name = string("logits_1_strides_0"), val = tensor([1])]; + string logits_1_pad_type_0 = const()[name = string("logits_1_pad_type_0"), val = string("valid")]; + tensor logits_1_pad_0 = const()[name = string("logits_1_pad_0"), val = tensor([0, 0])]; + tensor logits_1_dilations_0 = const()[name = string("logits_1_dilations_0"), val = tensor([1])]; + int32 logits_1_groups_0 = const()[name = string("logits_1_groups_0"), val = int32(1)]; + tensor logits_1 = conv(dilations = logits_1_dilations_0, groups = logits_1_groups_0, pad = logits_1_pad_0, pad_type = logits_1_pad_type_0, strides = logits_1_strides_0, weight = expand_dims_2, x = final_norm_rmsnorm)[name = string("logits_1")]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646937088)))]; + tensor logits_2_strides_0 = const()[name = string("logits_2_strides_0"), val = tensor([1])]; + string logits_2_pad_type_0 = const()[name = string("logits_2_pad_type_0"), val = string("valid")]; + tensor logits_2_pad_0 = const()[name = string("logits_2_pad_0"), val = tensor([0, 0])]; + tensor logits_2_dilations_0 = const()[name = string("logits_2_dilations_0"), val = tensor([1])]; + int32 logits_2_groups_0 = const()[name = string("logits_2_groups_0"), val = int32(1)]; + tensor logits_2 = conv(dilations = logits_2_dilations_0, groups = logits_2_groups_0, pad = logits_2_pad_0, pad_type = logits_2_pad_type_0, strides = logits_2_strides_0, weight = expand_dims_3, x = final_norm_rmsnorm)[name = string("logits_2")]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676297280)))]; + tensor logits_3_strides_0 = const()[name = string("logits_3_strides_0"), val = tensor([1])]; + string logits_3_pad_type_0 = const()[name = string("logits_3_pad_type_0"), val = string("valid")]; + tensor logits_3_pad_0 = const()[name = string("logits_3_pad_0"), val = tensor([0, 0])]; + tensor logits_3_dilations_0 = const()[name = string("logits_3_dilations_0"), val = tensor([1])]; + int32 logits_3_groups_0 = const()[name = string("logits_3_groups_0"), val = int32(1)]; + tensor logits_3 = conv(dilations = logits_3_dilations_0, groups = logits_3_groups_0, pad = logits_3_pad_0, pad_type = logits_3_pad_type_0, strides = logits_3_strides_0, weight = expand_dims_4, x = final_norm_rmsnorm)[name = string("logits_3")]; + tensor expand_dims_5 = const()[name = string("expand_dims_5"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705657472)))]; + tensor logits_4_strides_0 = const()[name = string("logits_4_strides_0"), val = tensor([1])]; + string logits_4_pad_type_0 = const()[name = string("logits_4_pad_type_0"), val = string("valid")]; + tensor logits_4_pad_0 = const()[name = string("logits_4_pad_0"), val = tensor([0, 0])]; + tensor logits_4_dilations_0 = const()[name = string("logits_4_dilations_0"), val = tensor([1])]; + int32 logits_4_groups_0 = const()[name = string("logits_4_groups_0"), val = int32(1)]; + tensor logits_4 = conv(dilations = logits_4_dilations_0, groups = logits_4_groups_0, pad = logits_4_pad_0, pad_type = logits_4_pad_type_0, strides = logits_4_strides_0, weight = expand_dims_5, x = final_norm_rmsnorm)[name = string("logits_4")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735017664)))]; + tensor logits_5_strides_0 = const()[name = string("logits_5_strides_0"), val = tensor([1])]; + string logits_5_pad_type_0 = const()[name = string("logits_5_pad_type_0"), val = string("valid")]; + tensor logits_5_pad_0 = const()[name = string("logits_5_pad_0"), val = tensor([0, 0])]; + tensor logits_5_dilations_0 = const()[name = string("logits_5_dilations_0"), val = tensor([1])]; + int32 logits_5_groups_0 = const()[name = string("logits_5_groups_0"), val = int32(1)]; + tensor logits_5 = conv(dilations = logits_5_dilations_0, groups = logits_5_groups_0, pad = logits_5_pad_0, pad_type = logits_5_pad_type_0, strides = logits_5_strides_0, weight = expand_dims_6, x = final_norm_rmsnorm)[name = string("logits_5")]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764377856)))]; + tensor logits_6_strides_0 = const()[name = string("logits_6_strides_0"), val = tensor([1])]; + string logits_6_pad_type_0 = const()[name = string("logits_6_pad_type_0"), val = string("valid")]; + tensor logits_6_pad_0 = const()[name = string("logits_6_pad_0"), val = tensor([0, 0])]; + tensor logits_6_dilations_0 = const()[name = string("logits_6_dilations_0"), val = tensor([1])]; + int32 logits_6_groups_0 = const()[name = string("logits_6_groups_0"), val = int32(1)]; + tensor logits_6 = conv(dilations = logits_6_dilations_0, groups = logits_6_groups_0, pad = logits_6_pad_0, pad_type = logits_6_pad_type_0, strides = logits_6_strides_0, weight = expand_dims_7, x = final_norm_rmsnorm)[name = string("logits_6")]; + tensor expand_dims_8 = const()[name = string("expand_dims_8"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793738048)))]; + tensor logits_7_strides_0 = const()[name = string("logits_7_strides_0"), val = tensor([1])]; + string logits_7_pad_type_0 = const()[name = string("logits_7_pad_type_0"), val = string("valid")]; + tensor logits_7_pad_0 = const()[name = string("logits_7_pad_0"), val = tensor([0, 0])]; + tensor logits_7_dilations_0 = const()[name = string("logits_7_dilations_0"), val = tensor([1])]; + int32 logits_7_groups_0 = const()[name = string("logits_7_groups_0"), val = int32(1)]; + tensor logits_7 = conv(dilations = logits_7_dilations_0, groups = logits_7_groups_0, pad = logits_7_pad_0, pad_type = logits_7_pad_type_0, strides = logits_7_strides_0, weight = expand_dims_8, x = final_norm_rmsnorm)[name = string("logits_7")]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823098240)))]; + tensor logits_8_strides_0 = const()[name = string("logits_8_strides_0"), val = tensor([1])]; + string logits_8_pad_type_0 = const()[name = string("logits_8_pad_type_0"), val = string("valid")]; + tensor logits_8_pad_0 = const()[name = string("logits_8_pad_0"), val = tensor([0, 0])]; + tensor logits_8_dilations_0 = const()[name = string("logits_8_dilations_0"), val = tensor([1])]; + int32 logits_8_groups_0 = const()[name = string("logits_8_groups_0"), val = int32(1)]; + tensor logits_8 = conv(dilations = logits_8_dilations_0, groups = logits_8_groups_0, pad = logits_8_pad_0, pad_type = logits_8_pad_type_0, strides = logits_8_strides_0, weight = expand_dims_9, x = final_norm_rmsnorm)[name = string("logits_8")]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852458432)))]; + tensor logits_9_strides_0 = const()[name = string("logits_9_strides_0"), val = tensor([1])]; + string logits_9_pad_type_0 = const()[name = string("logits_9_pad_type_0"), val = string("valid")]; + tensor logits_9_pad_0 = const()[name = string("logits_9_pad_0"), val = tensor([0, 0])]; + tensor logits_9_dilations_0 = const()[name = string("logits_9_dilations_0"), val = tensor([1])]; + int32 logits_9_groups_0 = const()[name = string("logits_9_groups_0"), val = int32(1)]; + tensor logits_9 = conv(dilations = logits_9_dilations_0, groups = logits_9_groups_0, pad = logits_9_pad_0, pad_type = logits_9_pad_type_0, strides = logits_9_strides_0, weight = expand_dims_10, x = final_norm_rmsnorm)[name = string("logits_9")]; + int32 _logits_axis_0 = const()[name = string("_logits_axis_0"), val = int32(1)]; + bool _logits_interleave_0 = const()[name = string("_logits_interleave_0"), val = bool(false)]; + tensor _logits = concat(axis = _logits_axis_0, interleave = _logits_interleave_0, values = (logits_0, logits_1, logits_2, logits_3, logits_4, logits_5, logits_6, logits_7, logits_8, logits_9))[name = string("_logits")]; + string logits_dtype_0 = const()[name = string("logits_dtype_0"), val = string("fp32")]; + tensor logits = cast(dtype = logits_dtype_0, x = _logits)[name = string("cast_0")]; + } -> (logits); + func length_64(tensor input_ids, state> key_cache_state, tensor query_pos1, state> value_cache_state) { + tensor expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor([-1, -2])]; + tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = query_pos1)[name = string("expand_dims_0")]; + tensor add_0_x_0 = const()[name = string("add_0_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860486656)))]; + tensor add_0 = add(x = add_0_x_0, y = expand_dims_0)[name = string("add_0")]; + tensor mask_gather_x_0 = const()[name = string("mask_gather_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; + int32 mask_gather_axis_0 = const()[name = string("mask_gather_axis_0"), val = int32(0)]; + int32 mask_gather_batch_dims_0 = const()[name = string("mask_gather_batch_dims_0"), val = int32(0)]; + bool mask_gather_validate_indices_0 = const()[name = string("mask_gather_validate_indices_0"), val = bool(false)]; + tensor mask_gather = gather(axis = mask_gather_axis_0, batch_dims = mask_gather_batch_dims_0, indices = add_0, validate_indices = mask_gather_validate_indices_0, x = mask_gather_x_0)[name = string("mask_gather")]; + tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor query_sin_emb_x_0 = const()[name = string("query_sin_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524416)))]; + int32 query_sin_emb_axis_0 = const()[name = string("query_sin_emb_axis_0"), val = int32(0)]; + int32 query_sin_emb_batch_dims_0 = const()[name = string("query_sin_emb_batch_dims_0"), val = int32(0)]; + bool query_sin_emb_validate_indices_0 = const()[name = string("query_sin_emb_validate_indices_0"), val = bool(false)]; + tensor query_sin_emb = gather(axis = query_sin_emb_axis_0, batch_dims = query_sin_emb_batch_dims_0, indices = add_0, validate_indices = query_sin_emb_validate_indices_0, x = query_sin_emb_x_0)[name = string("query_sin_emb")]; + tensor query_cos_emb_x_0 = const()[name = string("query_cos_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(590016)))]; + int32 query_cos_emb_axis_0 = const()[name = string("query_cos_emb_axis_0"), val = int32(0)]; + int32 query_cos_emb_batch_dims_0 = const()[name = string("query_cos_emb_batch_dims_0"), val = int32(0)]; + bool query_cos_emb_validate_indices_0 = const()[name = string("query_cos_emb_validate_indices_0"), val = bool(false)]; + tensor query_cos_emb = gather(axis = query_cos_emb_axis_0, batch_dims = query_cos_emb_batch_dims_0, indices = add_0, validate_indices = query_cos_emb_validate_indices_0, x = query_cos_emb_x_0)[name = string("query_cos_emb")]; + tensor token_embedding_x_0 = const()[name = string("token_embedding_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655616)))]; + int32 token_embedding_axis_0 = const()[name = string("token_embedding_axis_0"), val = int32(0)]; + int32 token_embedding_batch_dims_0 = const()[name = string("token_embedding_batch_dims_0"), val = int32(0)]; + bool token_embedding_validate_indices_0 = const()[name = string("token_embedding_validate_indices_0"), val = bool(false)]; + tensor token_embedding = gather(axis = token_embedding_axis_0, batch_dims = token_embedding_batch_dims_0, indices = input_ids, validate_indices = token_embedding_validate_indices_0, x = token_embedding_x_0)[name = string("token_embedding")]; + tensor input_embeddings_channels_first_perm_0 = const()[name = string("input_embeddings_channels_first_perm_0"), val = tensor([0, 2, 1])]; + int32 end_pos_0_x_0 = const()[name = string("end_pos_0_x_0"), val = int32(64)]; + tensor end_pos_0 = add(x = end_pos_0_x_0, y = query_pos1)[name = string("end_pos_0")]; + tensor read_state_0 = read_state(input = key_cache_state)[name = string("read_state_0")]; + tensor read_state_1 = read_state(input = value_cache_state)[name = string("read_state_1")]; + tensor input_embeddings_channels_first = transpose(perm = input_embeddings_channels_first_perm_0, x = token_embedding)[name = string("transpose_49")]; + tensor block_0_attention_rmsnorm_abs = abs(x = input_embeddings_channels_first)[name = string("block_0_attention_rmsnorm_abs")]; + tensor block_0_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_0_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_0_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_0_attention_rmsnorm_maxval = reduce_max(axes = block_0_attention_rmsnorm_maxval_axes_0, keep_dims = block_0_attention_rmsnorm_maxval_keep_dims_0, x = block_0_attention_rmsnorm_abs)[name = string("block_0_attention_rmsnorm_maxval")]; + fp16 block_0_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_0_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_0_attention_rmsnorm_maxval_clipped = clip(alpha = block_0_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_0_attention_rmsnorm_maxval_clipped_beta_0, x = block_0_attention_rmsnorm_maxval)[name = string("block_0_attention_rmsnorm_maxval_clipped")]; + tensor block_0_attention_rmsnorm_scaled = real_div(x = input_embeddings_channels_first, y = block_0_attention_rmsnorm_maxval_clipped)[name = string("block_0_attention_rmsnorm_scaled")]; + tensor block_0_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_0_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_0_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_0_attention_rmsnorm_squared_sum_keep_dims_0, x = block_0_attention_rmsnorm_scaled)[name = string("block_0_attention_rmsnorm_squared_sum")]; + fp16 block_0_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_0_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_0_attention_rmsnorm_rsqrt_epsilon_0, x = block_0_attention_rmsnorm_squared_sum)[name = string("block_0_attention_rmsnorm_rsqrt")]; + fp16 block_0_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_0_attention_rmsnorm_dim_scaled = mul(x = block_0_attention_rmsnorm_scaled, y = block_0_attention_rmsnorm_dim_scaled_y_0)[name = string("block_0_attention_rmsnorm_dim_scaled")]; + tensor block_0_attention_rmsnorm_normalized = mul(x = block_0_attention_rmsnorm_dim_scaled, y = block_0_attention_rmsnorm_rsqrt)[name = string("block_0_attention_rmsnorm_normalized")]; + tensor block_0_attention_rmsnorm_y_0 = const()[name = string("block_0_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272924992)))]; + tensor block_0_attention_rmsnorm = mul(x = block_0_attention_rmsnorm_normalized, y = block_0_attention_rmsnorm_y_0)[name = string("block_0_attention_rmsnorm")]; + tensor attention_0_qkvproj_weight_0 = const()[name = string("attention_0_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272926848)))]; + tensor attention_0_qkvproj_bias_0 = const()[name = string("attention_0_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274991296)))]; + tensor attention_0_qkvproj_strides_0 = const()[name = string("attention_0_qkvproj_strides_0"), val = tensor([1])]; + string attention_0_qkvproj_pad_type_0 = const()[name = string("attention_0_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_0_qkvproj_pad_0 = const()[name = string("attention_0_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_0_qkvproj_dilations_0 = const()[name = string("attention_0_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_0_qkvproj_groups_0 = const()[name = string("attention_0_qkvproj_groups_0"), val = int32(1)]; + tensor attention_0_qkvproj = conv(bias = attention_0_qkvproj_bias_0, dilations = attention_0_qkvproj_dilations_0, groups = attention_0_qkvproj_groups_0, pad = attention_0_qkvproj_pad_0, pad_type = attention_0_qkvproj_pad_type_0, strides = attention_0_qkvproj_strides_0, weight = attention_0_qkvproj_weight_0, x = block_0_attention_rmsnorm)[name = string("attention_0_qkvproj")]; + tensor attention_0_head_reshape_shape_0 = const()[name = string("attention_0_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_0_head_reshape = reshape(shape = attention_0_head_reshape_shape_0, x = attention_0_qkvproj)[name = string("attention_0_head_reshape")]; + tensor attention_0_head_transpose_perm_0 = const()[name = string("attention_0_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_0_split_qkv_heads_axis_0 = const()[name = string("attention_0_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_0_split_qkv_heads_split_sizes_0 = const()[name = string("attention_0_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_0_head_transpose = transpose(perm = attention_0_head_transpose_perm_0, x = attention_0_head_reshape)[name = string("transpose_48")]; + tensor attention_0_split_qkv_heads_0, tensor attention_0_split_qkv_heads_1, tensor attention_0_split_qkv_heads_2 = split(axis = attention_0_split_qkv_heads_axis_0, split_sizes = attention_0_split_qkv_heads_split_sizes_0, x = attention_0_head_transpose)[name = string("attention_0_split_qkv_heads")]; + tensor attention_0_q_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_0_q_rope_lhs_mult")]; + int32 attention_0_q_rotate_half_split_num_splits_0 = const()[name = string("attention_0_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_0_q_rotate_half_split_axis_0 = const()[name = string("attention_0_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_0_q_rotate_half_split_0, tensor attention_0_q_rotate_half_split_1 = split(axis = attention_0_q_rotate_half_split_axis_0, num_splits = attention_0_q_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_0)[name = string("attention_0_q_rotate_half_split")]; + fp16 attention_0_q_rotate_half_neg_y_0 = const()[name = string("attention_0_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_0_q_rotate_half_neg = mul(x = attention_0_q_rotate_half_split_1, y = attention_0_q_rotate_half_neg_y_0)[name = string("attention_0_q_rotate_half_neg")]; + int32 attention_0_q_rotate_half_concat_axis_0 = const()[name = string("attention_0_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_0_q_rotate_half_concat_interleave_0 = const()[name = string("attention_0_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_0_q_rotate_half_concat = concat(axis = attention_0_q_rotate_half_concat_axis_0, interleave = attention_0_q_rotate_half_concat_interleave_0, values = (attention_0_q_rotate_half_neg, attention_0_q_rotate_half_split_0))[name = string("attention_0_q_rotate_half_concat")]; + tensor attention_0_q_rope_rhs_mult = mul(x = attention_0_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_q_rope_rhs_mult")]; + tensor attention_0_q_rope = add(x = attention_0_q_rope_lhs_mult, y = attention_0_q_rope_rhs_mult)[name = string("attention_0_q_rope")]; + tensor attention_0_k_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_0_k_rope_lhs_mult")]; + int32 attention_0_k_rotate_half_split_num_splits_0 = const()[name = string("attention_0_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_0_k_rotate_half_split_axis_0 = const()[name = string("attention_0_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_0_k_rotate_half_split_0, tensor attention_0_k_rotate_half_split_1 = split(axis = attention_0_k_rotate_half_split_axis_0, num_splits = attention_0_k_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_1)[name = string("attention_0_k_rotate_half_split")]; + fp16 attention_0_k_rotate_half_neg_y_0 = const()[name = string("attention_0_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_0_k_rotate_half_neg = mul(x = attention_0_k_rotate_half_split_1, y = attention_0_k_rotate_half_neg_y_0)[name = string("attention_0_k_rotate_half_neg")]; + int32 attention_0_k_rotate_half_concat_axis_0 = const()[name = string("attention_0_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_0_k_rotate_half_concat_interleave_0 = const()[name = string("attention_0_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_0_k_rotate_half_concat = concat(axis = attention_0_k_rotate_half_concat_axis_0, interleave = attention_0_k_rotate_half_concat_interleave_0, values = (attention_0_k_rotate_half_neg, attention_0_k_rotate_half_split_0))[name = string("attention_0_k_rotate_half_concat")]; + tensor attention_0_k_rope_rhs_mult = mul(x = attention_0_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_k_rope_rhs_mult")]; + tensor attention_0_k_rope = add(x = attention_0_k_rope_lhs_mult, y = attention_0_k_rope_rhs_mult)[name = string("attention_0_k_rope")]; + int32 attention_0_q_splits_axis_0 = const()[name = string("attention_0_q_splits_axis_0"), val = int32(1)]; + int32 attention_0_q_splits_num_splits_0 = const()[name = string("attention_0_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_0_q_splits_0, tensor attention_0_q_splits_1 = split(axis = attention_0_q_splits_axis_0, num_splits = attention_0_q_splits_num_splits_0, x = attention_0_q_rope)[name = string("attention_0_q_splits")]; + tensor attention_0_update_begin_0_values0_0 = const()[name = string("attention_0_update_begin_0_values0_0"), val = tensor([0])]; + tensor attention_0_update_begin_0_values1_0 = const()[name = string("attention_0_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_0_update_begin_0_values3_0 = const()[name = string("attention_0_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_0_update_begin_0_axis_0 = const()[name = string("attention_0_update_begin_0_axis_0"), val = int32(0)]; + bool attention_0_update_begin_0_interleave_0 = const()[name = string("attention_0_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_0_update_begin_0 = concat(axis = attention_0_update_begin_0_axis_0, interleave = attention_0_update_begin_0_interleave_0, values = (attention_0_update_begin_0_values0_0, attention_0_update_begin_0_values1_0, query_pos1, attention_0_update_begin_0_values3_0))[name = string("attention_0_update_begin_0")]; + tensor attention_0_update_end_0_values0_0 = const()[name = string("attention_0_update_end_0_values0_0"), val = tensor([1])]; + tensor attention_0_update_end_0_values1_0 = const()[name = string("attention_0_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_0_update_end_0_values3_0 = const()[name = string("attention_0_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_0_update_end_0_axis_0 = const()[name = string("attention_0_update_end_0_axis_0"), val = int32(0)]; + bool attention_0_update_end_0_interleave_0 = const()[name = string("attention_0_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_0_update_end_0 = concat(axis = attention_0_update_end_0_axis_0, interleave = attention_0_update_end_0_interleave_0, values = (attention_0_update_end_0_values0_0, attention_0_update_end_0_values1_0, end_pos_0, attention_0_update_end_0_values3_0))[name = string("attention_0_update_end_0")]; + tensor attention_0_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_updated_key_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_key_cache_0_squeeze_mask_0, update = attention_0_k_rope, x = read_state_0)[name = string("attention_0_updated_key_cache_0")]; + write_state(data = attention_0_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_48_write_state")]; + tensor coreml_update_state_0 = read_state(input = key_cache_state)[name = string("coreml_update_state_48")]; + tensor attention_0_key_cache_begin_0 = const()[name = string("attention_0_key_cache_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor attention_0_key_cache_end_0 = const()[name = string("attention_0_key_cache_end_0"), val = tensor([1, 2, 512, 64])]; + tensor attention_0_key_cache_squeeze_mask_0 = const()[name = string("attention_0_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_key_cache = slice_by_index(begin = attention_0_key_cache_begin_0, end = attention_0_key_cache_end_0, squeeze_mask = attention_0_key_cache_squeeze_mask_0, x = coreml_update_state_0)[name = string("attention_0_key_cache")]; + int32 attention_0_key_cache_head_axis_0 = const()[name = string("attention_0_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_0_key_cache_head_num_splits_0 = const()[name = string("attention_0_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_0_key_cache_head_0, tensor attention_0_key_cache_head_1 = split(axis = attention_0_key_cache_head_axis_0, num_splits = attention_0_key_cache_head_num_splits_0, x = attention_0_key_cache)[name = string("attention_0_key_cache_head")]; + tensor attention_0_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_updated_value_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_value_cache_0_squeeze_mask_0, update = attention_0_split_qkv_heads_2, x = read_state_1)[name = string("attention_0_updated_value_cache_0")]; + write_state(data = attention_0_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_49_write_state")]; + tensor coreml_update_state_1 = read_state(input = value_cache_state)[name = string("coreml_update_state_49")]; + tensor attention_0_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_0_slice_current_layer_value_cache_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor attention_0_slice_current_layer_value_cache_end_0 = const()[name = string("attention_0_slice_current_layer_value_cache_end_0"), val = tensor([1, 2, 512, 64])]; + tensor attention_0_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_0_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_0_slice_current_layer_value_cache = slice_by_index(begin = attention_0_slice_current_layer_value_cache_begin_0, end = attention_0_slice_current_layer_value_cache_end_0, squeeze_mask = attention_0_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_1)[name = string("attention_0_slice_current_layer_value_cache")]; + int32 attention_0_slice_value_cache_heads_axis_0 = const()[name = string("attention_0_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_0_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_0_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_0_slice_value_cache_heads_0, tensor attention_0_slice_value_cache_heads_1 = split(axis = attention_0_slice_value_cache_heads_axis_0, num_splits = attention_0_slice_value_cache_heads_num_splits_0, x = attention_0_slice_current_layer_value_cache)[name = string("attention_0_slice_value_cache_heads")]; + bool attention_0_scores_0_transpose_y_0 = const()[name = string("attention_0_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_0_scores_0_transpose_x_0 = const()[name = string("attention_0_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_0_scores_0 = matmul(transpose_x = attention_0_scores_0_transpose_x_0, transpose_y = attention_0_scores_0_transpose_y_0, x = attention_0_key_cache_head_0, y = attention_0_q_splits_0)[name = string("attention_0_scores_0")]; + fp16 attention_0_scaled_scores_0_y_0 = const()[name = string("attention_0_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_0_scaled_scores_0 = mul(x = attention_0_scores_0, y = attention_0_scaled_scores_0_y_0)[name = string("attention_0_scaled_scores_0")]; + tensor transpose_0 = transpose(perm = transpose_0_perm_0, x = mask_gather)[name = string("transpose_50")]; + tensor attention_0_masked_scaled_scores_0 = add(x = attention_0_scaled_scores_0, y = transpose_0)[name = string("attention_0_masked_scaled_scores_0")]; + int32 softmax_0_axis_0 = const()[name = string("softmax_0_axis_0"), val = int32(-2)]; + tensor softmax_0 = softmax(axis = softmax_0_axis_0, x = attention_0_masked_scaled_scores_0)[name = string("softmax_0")]; + bool attention_0_attention_0_transpose_x_0 = const()[name = string("attention_0_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_0_attention_0_transpose_y_0 = const()[name = string("attention_0_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_0_attention_0 = matmul(transpose_x = attention_0_attention_0_transpose_x_0, transpose_y = attention_0_attention_0_transpose_y_0, x = softmax_0, y = attention_0_slice_value_cache_heads_0)[name = string("attention_0_attention_0")]; + bool attention_0_scores_1_transpose_y_0 = const()[name = string("attention_0_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_0_scores_1_transpose_x_0 = const()[name = string("attention_0_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_0_scores_1 = matmul(transpose_x = attention_0_scores_1_transpose_x_0, transpose_y = attention_0_scores_1_transpose_y_0, x = attention_0_key_cache_head_1, y = attention_0_q_splits_1)[name = string("attention_0_scores_1")]; + fp16 attention_0_scaled_scores_1_y_0 = const()[name = string("attention_0_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_0_scaled_scores_1 = mul(x = attention_0_scores_1, y = attention_0_scaled_scores_1_y_0)[name = string("attention_0_scaled_scores_1")]; + tensor attention_0_masked_scaled_scores_1 = add(x = attention_0_scaled_scores_1, y = transpose_0)[name = string("attention_0_masked_scaled_scores_1")]; + int32 softmax_1_axis_0 = const()[name = string("softmax_1_axis_0"), val = int32(-2)]; + tensor softmax_1 = softmax(axis = softmax_1_axis_0, x = attention_0_masked_scaled_scores_1)[name = string("softmax_1")]; + bool attention_0_attention_1_transpose_x_0 = const()[name = string("attention_0_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_0_attention_1_transpose_y_0 = const()[name = string("attention_0_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_0_attention_1 = matmul(transpose_x = attention_0_attention_1_transpose_x_0, transpose_y = attention_0_attention_1_transpose_y_0, x = softmax_1, y = attention_0_slice_value_cache_heads_1)[name = string("attention_0_attention_1")]; + int32 attention_0_concat_attention_all_heads_axis_0 = const()[name = string("attention_0_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_0_concat_attention_all_heads_interleave_0 = const()[name = string("attention_0_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_0_concat_attention_all_heads = concat(axis = attention_0_concat_attention_all_heads_axis_0, interleave = attention_0_concat_attention_all_heads_interleave_0, values = (attention_0_attention_0, attention_0_attention_1))[name = string("attention_0_concat_attention_all_heads")]; + tensor attention_0_channels_first_retransposed_perm_0 = const()[name = string("attention_0_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_0_reshaped_shape_0 = const()[name = string("attention_0_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_0_channels_first_retransposed = transpose(perm = attention_0_channels_first_retransposed_perm_0, x = attention_0_concat_attention_all_heads)[name = string("transpose_47")]; + tensor attention_0_reshaped = reshape(shape = attention_0_reshaped_shape_0, x = attention_0_channels_first_retransposed)[name = string("attention_0_reshaped")]; + tensor attention_0_outproj_weight_0 = const()[name = string("attention_0_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274993664)))]; + tensor attention_0_outproj_strides_0 = const()[name = string("attention_0_outproj_strides_0"), val = tensor([1])]; + string attention_0_outproj_pad_type_0 = const()[name = string("attention_0_outproj_pad_type_0"), val = string("valid")]; + tensor attention_0_outproj_pad_0 = const()[name = string("attention_0_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_0_outproj_dilations_0 = const()[name = string("attention_0_outproj_dilations_0"), val = tensor([1])]; + int32 attention_0_outproj_groups_0 = const()[name = string("attention_0_outproj_groups_0"), val = int32(1)]; + tensor attention_0_outproj = conv(dilations = attention_0_outproj_dilations_0, groups = attention_0_outproj_groups_0, pad = attention_0_outproj_pad_0, pad_type = attention_0_outproj_pad_type_0, strides = attention_0_outproj_strides_0, weight = attention_0_outproj_weight_0, x = attention_0_reshaped)[name = string("attention_0_outproj")]; + tensor block_0_residual_1 = add(x = input_embeddings_channels_first, y = attention_0_outproj)[name = string("block_0_residual_1")]; + tensor block_0_ffn_rmsnorm_abs = abs(x = block_0_residual_1)[name = string("block_0_ffn_rmsnorm_abs")]; + tensor block_0_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_0_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_0_ffn_rmsnorm_maxval = reduce_max(axes = block_0_ffn_rmsnorm_maxval_axes_0, keep_dims = block_0_ffn_rmsnorm_maxval_keep_dims_0, x = block_0_ffn_rmsnorm_abs)[name = string("block_0_ffn_rmsnorm_maxval")]; + fp16 block_0_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_0_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_0_ffn_rmsnorm_maxval_clipped = clip(alpha = block_0_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_0_ffn_rmsnorm_maxval_clipped_beta_0, x = block_0_ffn_rmsnorm_maxval)[name = string("block_0_ffn_rmsnorm_maxval_clipped")]; + tensor block_0_ffn_rmsnorm_scaled = real_div(x = block_0_residual_1, y = block_0_ffn_rmsnorm_maxval_clipped)[name = string("block_0_ffn_rmsnorm_scaled")]; + tensor block_0_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_0_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_0_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_0_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_0_ffn_rmsnorm_scaled)[name = string("block_0_ffn_rmsnorm_squared_sum")]; + fp16 block_0_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_0_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_0_ffn_rmsnorm_rsqrt_epsilon_0, x = block_0_ffn_rmsnorm_squared_sum)[name = string("block_0_ffn_rmsnorm_rsqrt")]; + fp16 block_0_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_0_ffn_rmsnorm_dim_scaled = mul(x = block_0_ffn_rmsnorm_scaled, y = block_0_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_0_ffn_rmsnorm_dim_scaled")]; + tensor block_0_ffn_rmsnorm_normalized = mul(x = block_0_ffn_rmsnorm_dim_scaled, y = block_0_ffn_rmsnorm_rsqrt)[name = string("block_0_ffn_rmsnorm_normalized")]; + tensor block_0_ffn_rmsnorm_y_0 = const()[name = string("block_0_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276599360)))]; + tensor block_0_ffn_rmsnorm = mul(x = block_0_ffn_rmsnorm_normalized, y = block_0_ffn_rmsnorm_y_0)[name = string("block_0_ffn_rmsnorm")]; + tensor block_0_ffn_inproj_weight_0 = const()[name = string("block_0_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276601216)))]; + tensor block_0_ffn_inproj_strides_0 = const()[name = string("block_0_ffn_inproj_strides_0"), val = tensor([1])]; + string block_0_ffn_inproj_pad_type_0 = const()[name = string("block_0_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_0_ffn_inproj_pad_0 = const()[name = string("block_0_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_0_ffn_inproj_dilations_0 = const()[name = string("block_0_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_0_ffn_inproj_groups_0 = const()[name = string("block_0_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_0_ffn_inproj = conv(dilations = block_0_ffn_inproj_dilations_0, groups = block_0_ffn_inproj_groups_0, pad = block_0_ffn_inproj_pad_0, pad_type = block_0_ffn_inproj_pad_type_0, strides = block_0_ffn_inproj_strides_0, weight = block_0_ffn_inproj_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_inproj")]; + tensor block_0_ffn_g_weight_0 = const()[name = string("block_0_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285317568)))]; + tensor block_0_ffn_g_strides_0 = const()[name = string("block_0_ffn_g_strides_0"), val = tensor([1])]; + string block_0_ffn_g_pad_type_0 = const()[name = string("block_0_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_0_ffn_g_pad_0 = const()[name = string("block_0_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_0_ffn_g_dilations_0 = const()[name = string("block_0_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_0_ffn_g_groups_0 = const()[name = string("block_0_ffn_g_groups_0"), val = int32(1)]; + tensor block_0_ffn_g = conv(dilations = block_0_ffn_g_dilations_0, groups = block_0_ffn_g_groups_0, pad = block_0_ffn_g_pad_0, pad_type = block_0_ffn_g_pad_type_0, strides = block_0_ffn_g_strides_0, weight = block_0_ffn_g_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_g")]; + tensor block_0_ffn_g_activation = silu(x = block_0_ffn_g)[name = string("block_0_ffn_g_activation")]; + tensor block_0_ffn_x_gated = mul(x = block_0_ffn_inproj, y = block_0_ffn_g_activation)[name = string("block_0_ffn_x_gated")]; + tensor block_0_ffn_outproj_weight_0 = const()[name = string("block_0_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294033920)))]; + tensor block_0_ffn_outproj_strides_0 = const()[name = string("block_0_ffn_outproj_strides_0"), val = tensor([1])]; + string block_0_ffn_outproj_pad_type_0 = const()[name = string("block_0_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_0_ffn_outproj_pad_0 = const()[name = string("block_0_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_0_ffn_outproj_dilations_0 = const()[name = string("block_0_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_0_ffn_outproj_groups_0 = const()[name = string("block_0_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_0_ffn_outproj = conv(dilations = block_0_ffn_outproj_dilations_0, groups = block_0_ffn_outproj_groups_0, pad = block_0_ffn_outproj_pad_0, pad_type = block_0_ffn_outproj_pad_type_0, strides = block_0_ffn_outproj_strides_0, weight = block_0_ffn_outproj_weight_0, x = block_0_ffn_x_gated)[name = string("block_0_ffn_outproj")]; + tensor block_0_residual_2 = add(x = block_0_ffn_outproj, y = block_0_residual_1)[name = string("block_0_residual_2")]; + tensor block_1_attention_rmsnorm_abs = abs(x = block_0_residual_2)[name = string("block_1_attention_rmsnorm_abs")]; + tensor block_1_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_1_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_1_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_1_attention_rmsnorm_maxval = reduce_max(axes = block_1_attention_rmsnorm_maxval_axes_0, keep_dims = block_1_attention_rmsnorm_maxval_keep_dims_0, x = block_1_attention_rmsnorm_abs)[name = string("block_1_attention_rmsnorm_maxval")]; + fp16 block_1_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_1_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_1_attention_rmsnorm_maxval_clipped = clip(alpha = block_1_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_1_attention_rmsnorm_maxval_clipped_beta_0, x = block_1_attention_rmsnorm_maxval)[name = string("block_1_attention_rmsnorm_maxval_clipped")]; + tensor block_1_attention_rmsnorm_scaled = real_div(x = block_0_residual_2, y = block_1_attention_rmsnorm_maxval_clipped)[name = string("block_1_attention_rmsnorm_scaled")]; + tensor block_1_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_1_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_1_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_1_attention_rmsnorm_squared_sum_keep_dims_0, x = block_1_attention_rmsnorm_scaled)[name = string("block_1_attention_rmsnorm_squared_sum")]; + fp16 block_1_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_1_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_1_attention_rmsnorm_rsqrt_epsilon_0, x = block_1_attention_rmsnorm_squared_sum)[name = string("block_1_attention_rmsnorm_rsqrt")]; + fp16 block_1_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_1_attention_rmsnorm_dim_scaled = mul(x = block_1_attention_rmsnorm_scaled, y = block_1_attention_rmsnorm_dim_scaled_y_0)[name = string("block_1_attention_rmsnorm_dim_scaled")]; + tensor block_1_attention_rmsnorm_normalized = mul(x = block_1_attention_rmsnorm_dim_scaled, y = block_1_attention_rmsnorm_rsqrt)[name = string("block_1_attention_rmsnorm_normalized")]; + tensor block_1_attention_rmsnorm_y_0 = const()[name = string("block_1_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302750272)))]; + tensor block_1_attention_rmsnorm = mul(x = block_1_attention_rmsnorm_normalized, y = block_1_attention_rmsnorm_y_0)[name = string("block_1_attention_rmsnorm")]; + tensor attention_1_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302752128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303526336))))[name = string("attention_1_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_0 = constexpr_blockwise_shift_scale(data = attention_1_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303563264))))[name = string("constexpr_blockwise_shift_scale_0")]; + tensor attention_1_qkvproj_bias_0 = const()[name = string("attention_1_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303565632)))]; + tensor attention_1_qkvproj_strides_0 = const()[name = string("attention_1_qkvproj_strides_0"), val = tensor([1])]; + string attention_1_qkvproj_pad_type_0 = const()[name = string("attention_1_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_1_qkvproj_pad_0 = const()[name = string("attention_1_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_1_qkvproj_dilations_0 = const()[name = string("attention_1_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_1_qkvproj_groups_0 = const()[name = string("attention_1_qkvproj_groups_0"), val = int32(1)]; + tensor attention_1_qkvproj = conv(bias = attention_1_qkvproj_bias_0, dilations = attention_1_qkvproj_dilations_0, groups = attention_1_qkvproj_groups_0, pad = attention_1_qkvproj_pad_0, pad_type = attention_1_qkvproj_pad_type_0, strides = attention_1_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_0, x = block_1_attention_rmsnorm)[name = string("attention_1_qkvproj")]; + tensor attention_1_head_reshape_shape_0 = const()[name = string("attention_1_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_1_head_reshape = reshape(shape = attention_1_head_reshape_shape_0, x = attention_1_qkvproj)[name = string("attention_1_head_reshape")]; + tensor attention_1_head_transpose_perm_0 = const()[name = string("attention_1_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_1_split_qkv_heads_axis_0 = const()[name = string("attention_1_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_1_split_qkv_heads_split_sizes_0 = const()[name = string("attention_1_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_1_head_transpose = transpose(perm = attention_1_head_transpose_perm_0, x = attention_1_head_reshape)[name = string("transpose_46")]; + tensor attention_1_split_qkv_heads_0, tensor attention_1_split_qkv_heads_1, tensor attention_1_split_qkv_heads_2 = split(axis = attention_1_split_qkv_heads_axis_0, split_sizes = attention_1_split_qkv_heads_split_sizes_0, x = attention_1_head_transpose)[name = string("attention_1_split_qkv_heads")]; + tensor attention_1_q_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_1_q_rope_lhs_mult")]; + int32 attention_1_q_rotate_half_split_num_splits_0 = const()[name = string("attention_1_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_1_q_rotate_half_split_axis_0 = const()[name = string("attention_1_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_1_q_rotate_half_split_0, tensor attention_1_q_rotate_half_split_1 = split(axis = attention_1_q_rotate_half_split_axis_0, num_splits = attention_1_q_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_0)[name = string("attention_1_q_rotate_half_split")]; + fp16 attention_1_q_rotate_half_neg_y_0 = const()[name = string("attention_1_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_1_q_rotate_half_neg = mul(x = attention_1_q_rotate_half_split_1, y = attention_1_q_rotate_half_neg_y_0)[name = string("attention_1_q_rotate_half_neg")]; + int32 attention_1_q_rotate_half_concat_axis_0 = const()[name = string("attention_1_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_1_q_rotate_half_concat_interleave_0 = const()[name = string("attention_1_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_1_q_rotate_half_concat = concat(axis = attention_1_q_rotate_half_concat_axis_0, interleave = attention_1_q_rotate_half_concat_interleave_0, values = (attention_1_q_rotate_half_neg, attention_1_q_rotate_half_split_0))[name = string("attention_1_q_rotate_half_concat")]; + tensor attention_1_q_rope_rhs_mult = mul(x = attention_1_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_q_rope_rhs_mult")]; + tensor attention_1_q_rope = add(x = attention_1_q_rope_lhs_mult, y = attention_1_q_rope_rhs_mult)[name = string("attention_1_q_rope")]; + tensor attention_1_k_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_1_k_rope_lhs_mult")]; + int32 attention_1_k_rotate_half_split_num_splits_0 = const()[name = string("attention_1_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_1_k_rotate_half_split_axis_0 = const()[name = string("attention_1_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_1_k_rotate_half_split_0, tensor attention_1_k_rotate_half_split_1 = split(axis = attention_1_k_rotate_half_split_axis_0, num_splits = attention_1_k_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_1)[name = string("attention_1_k_rotate_half_split")]; + fp16 attention_1_k_rotate_half_neg_y_0 = const()[name = string("attention_1_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_1_k_rotate_half_neg = mul(x = attention_1_k_rotate_half_split_1, y = attention_1_k_rotate_half_neg_y_0)[name = string("attention_1_k_rotate_half_neg")]; + int32 attention_1_k_rotate_half_concat_axis_0 = const()[name = string("attention_1_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_1_k_rotate_half_concat_interleave_0 = const()[name = string("attention_1_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_1_k_rotate_half_concat = concat(axis = attention_1_k_rotate_half_concat_axis_0, interleave = attention_1_k_rotate_half_concat_interleave_0, values = (attention_1_k_rotate_half_neg, attention_1_k_rotate_half_split_0))[name = string("attention_1_k_rotate_half_concat")]; + tensor attention_1_k_rope_rhs_mult = mul(x = attention_1_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_k_rope_rhs_mult")]; + tensor attention_1_k_rope = add(x = attention_1_k_rope_lhs_mult, y = attention_1_k_rope_rhs_mult)[name = string("attention_1_k_rope")]; + int32 attention_1_q_splits_axis_0 = const()[name = string("attention_1_q_splits_axis_0"), val = int32(1)]; + int32 attention_1_q_splits_num_splits_0 = const()[name = string("attention_1_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_1_q_splits_0, tensor attention_1_q_splits_1 = split(axis = attention_1_q_splits_axis_0, num_splits = attention_1_q_splits_num_splits_0, x = attention_1_q_rope)[name = string("attention_1_q_splits")]; + tensor attention_1_update_begin_0_values0_0 = const()[name = string("attention_1_update_begin_0_values0_0"), val = tensor([1])]; + tensor attention_1_update_begin_0_values1_0 = const()[name = string("attention_1_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_1_update_begin_0_values3_0 = const()[name = string("attention_1_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_1_update_begin_0_axis_0 = const()[name = string("attention_1_update_begin_0_axis_0"), val = int32(0)]; + bool attention_1_update_begin_0_interleave_0 = const()[name = string("attention_1_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_1_update_begin_0 = concat(axis = attention_1_update_begin_0_axis_0, interleave = attention_1_update_begin_0_interleave_0, values = (attention_1_update_begin_0_values0_0, attention_1_update_begin_0_values1_0, query_pos1, attention_1_update_begin_0_values3_0))[name = string("attention_1_update_begin_0")]; + tensor attention_1_update_end_0_values0_0 = const()[name = string("attention_1_update_end_0_values0_0"), val = tensor([2])]; + tensor attention_1_update_end_0_values1_0 = const()[name = string("attention_1_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_1_update_end_0_values3_0 = const()[name = string("attention_1_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_1_update_end_0_axis_0 = const()[name = string("attention_1_update_end_0_axis_0"), val = int32(0)]; + bool attention_1_update_end_0_interleave_0 = const()[name = string("attention_1_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_1_update_end_0 = concat(axis = attention_1_update_end_0_axis_0, interleave = attention_1_update_end_0_interleave_0, values = (attention_1_update_end_0_values0_0, attention_1_update_end_0_values1_0, end_pos_0, attention_1_update_end_0_values3_0))[name = string("attention_1_update_end_0")]; + tensor attention_1_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_updated_key_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_key_cache_0_squeeze_mask_0, update = attention_1_k_rope, x = coreml_update_state_0)[name = string("attention_1_updated_key_cache_0")]; + write_state(data = attention_1_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_50_write_state")]; + tensor coreml_update_state_2 = read_state(input = key_cache_state)[name = string("coreml_update_state_50")]; + tensor attention_1_key_cache_begin_0 = const()[name = string("attention_1_key_cache_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor attention_1_key_cache_end_0 = const()[name = string("attention_1_key_cache_end_0"), val = tensor([2, 2, 512, 64])]; + tensor attention_1_key_cache_squeeze_mask_0 = const()[name = string("attention_1_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_key_cache = slice_by_index(begin = attention_1_key_cache_begin_0, end = attention_1_key_cache_end_0, squeeze_mask = attention_1_key_cache_squeeze_mask_0, x = coreml_update_state_2)[name = string("attention_1_key_cache")]; + int32 attention_1_key_cache_head_axis_0 = const()[name = string("attention_1_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_1_key_cache_head_num_splits_0 = const()[name = string("attention_1_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_1_key_cache_head_0, tensor attention_1_key_cache_head_1 = split(axis = attention_1_key_cache_head_axis_0, num_splits = attention_1_key_cache_head_num_splits_0, x = attention_1_key_cache)[name = string("attention_1_key_cache_head")]; + tensor attention_1_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_updated_value_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_value_cache_0_squeeze_mask_0, update = attention_1_split_qkv_heads_2, x = coreml_update_state_1)[name = string("attention_1_updated_value_cache_0")]; + write_state(data = attention_1_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_51_write_state")]; + tensor coreml_update_state_3 = read_state(input = value_cache_state)[name = string("coreml_update_state_51")]; + tensor attention_1_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_1_slice_current_layer_value_cache_begin_0"), val = tensor([1, 0, 0, 0])]; + tensor attention_1_slice_current_layer_value_cache_end_0 = const()[name = string("attention_1_slice_current_layer_value_cache_end_0"), val = tensor([2, 2, 512, 64])]; + tensor attention_1_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_1_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_1_slice_current_layer_value_cache = slice_by_index(begin = attention_1_slice_current_layer_value_cache_begin_0, end = attention_1_slice_current_layer_value_cache_end_0, squeeze_mask = attention_1_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_3)[name = string("attention_1_slice_current_layer_value_cache")]; + int32 attention_1_slice_value_cache_heads_axis_0 = const()[name = string("attention_1_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_1_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_1_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_1_slice_value_cache_heads_0, tensor attention_1_slice_value_cache_heads_1 = split(axis = attention_1_slice_value_cache_heads_axis_0, num_splits = attention_1_slice_value_cache_heads_num_splits_0, x = attention_1_slice_current_layer_value_cache)[name = string("attention_1_slice_value_cache_heads")]; + bool attention_1_scores_0_transpose_y_0 = const()[name = string("attention_1_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_1_scores_0_transpose_x_0 = const()[name = string("attention_1_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_1_scores_0 = matmul(transpose_x = attention_1_scores_0_transpose_x_0, transpose_y = attention_1_scores_0_transpose_y_0, x = attention_1_key_cache_head_0, y = attention_1_q_splits_0)[name = string("attention_1_scores_0")]; + fp16 attention_1_scaled_scores_0_y_0 = const()[name = string("attention_1_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_1_scaled_scores_0 = mul(x = attention_1_scores_0, y = attention_1_scaled_scores_0_y_0)[name = string("attention_1_scaled_scores_0")]; + tensor attention_1_masked_scaled_scores_0 = add(x = attention_1_scaled_scores_0, y = transpose_0)[name = string("attention_1_masked_scaled_scores_0")]; + int32 softmax_2_axis_0 = const()[name = string("softmax_2_axis_0"), val = int32(-2)]; + tensor softmax_2 = softmax(axis = softmax_2_axis_0, x = attention_1_masked_scaled_scores_0)[name = string("softmax_2")]; + bool attention_1_attention_0_transpose_x_0 = const()[name = string("attention_1_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_1_attention_0_transpose_y_0 = const()[name = string("attention_1_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_1_attention_0 = matmul(transpose_x = attention_1_attention_0_transpose_x_0, transpose_y = attention_1_attention_0_transpose_y_0, x = softmax_2, y = attention_1_slice_value_cache_heads_0)[name = string("attention_1_attention_0")]; + bool attention_1_scores_1_transpose_y_0 = const()[name = string("attention_1_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_1_scores_1_transpose_x_0 = const()[name = string("attention_1_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_1_scores_1 = matmul(transpose_x = attention_1_scores_1_transpose_x_0, transpose_y = attention_1_scores_1_transpose_y_0, x = attention_1_key_cache_head_1, y = attention_1_q_splits_1)[name = string("attention_1_scores_1")]; + fp16 attention_1_scaled_scores_1_y_0 = const()[name = string("attention_1_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_1_scaled_scores_1 = mul(x = attention_1_scores_1, y = attention_1_scaled_scores_1_y_0)[name = string("attention_1_scaled_scores_1")]; + tensor attention_1_masked_scaled_scores_1 = add(x = attention_1_scaled_scores_1, y = transpose_0)[name = string("attention_1_masked_scaled_scores_1")]; + int32 softmax_3_axis_0 = const()[name = string("softmax_3_axis_0"), val = int32(-2)]; + tensor softmax_3 = softmax(axis = softmax_3_axis_0, x = attention_1_masked_scaled_scores_1)[name = string("softmax_3")]; + bool attention_1_attention_1_transpose_x_0 = const()[name = string("attention_1_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_1_attention_1_transpose_y_0 = const()[name = string("attention_1_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_1_attention_1 = matmul(transpose_x = attention_1_attention_1_transpose_x_0, transpose_y = attention_1_attention_1_transpose_y_0, x = softmax_3, y = attention_1_slice_value_cache_heads_1)[name = string("attention_1_attention_1")]; + int32 attention_1_concat_attention_all_heads_axis_0 = const()[name = string("attention_1_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_1_concat_attention_all_heads_interleave_0 = const()[name = string("attention_1_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_1_concat_attention_all_heads = concat(axis = attention_1_concat_attention_all_heads_axis_0, interleave = attention_1_concat_attention_all_heads_interleave_0, values = (attention_1_attention_0, attention_1_attention_1))[name = string("attention_1_concat_attention_all_heads")]; + tensor attention_1_channels_first_retransposed_perm_0 = const()[name = string("attention_1_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_1_reshaped_shape_0 = const()[name = string("attention_1_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_1_channels_first_retransposed = transpose(perm = attention_1_channels_first_retransposed_perm_0, x = attention_1_concat_attention_all_heads)[name = string("transpose_45")]; + tensor attention_1_reshaped = reshape(shape = attention_1_reshaped_shape_0, x = attention_1_channels_first_retransposed)[name = string("attention_1_reshaped")]; + tensor attention_1_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303568000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304170176))))[name = string("attention_1_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_1 = constexpr_blockwise_shift_scale(data = attention_1_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304198912))))[name = string("constexpr_blockwise_shift_scale_1")]; + tensor attention_1_outproj_strides_0 = const()[name = string("attention_1_outproj_strides_0"), val = tensor([1])]; + string attention_1_outproj_pad_type_0 = const()[name = string("attention_1_outproj_pad_type_0"), val = string("valid")]; + tensor attention_1_outproj_pad_0 = const()[name = string("attention_1_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_1_outproj_dilations_0 = const()[name = string("attention_1_outproj_dilations_0"), val = tensor([1])]; + int32 attention_1_outproj_groups_0 = const()[name = string("attention_1_outproj_groups_0"), val = int32(1)]; + tensor attention_1_outproj = conv(dilations = attention_1_outproj_dilations_0, groups = attention_1_outproj_groups_0, pad = attention_1_outproj_pad_0, pad_type = attention_1_outproj_pad_type_0, strides = attention_1_outproj_strides_0, weight = constexpr_blockwise_shift_scale_1, x = attention_1_reshaped)[name = string("attention_1_outproj")]; + tensor block_1_residual_1 = add(x = block_0_residual_2, y = attention_1_outproj)[name = string("block_1_residual_1")]; + tensor block_1_ffn_rmsnorm_abs = abs(x = block_1_residual_1)[name = string("block_1_ffn_rmsnorm_abs")]; + tensor block_1_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_1_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_1_ffn_rmsnorm_maxval = reduce_max(axes = block_1_ffn_rmsnorm_maxval_axes_0, keep_dims = block_1_ffn_rmsnorm_maxval_keep_dims_0, x = block_1_ffn_rmsnorm_abs)[name = string("block_1_ffn_rmsnorm_maxval")]; + fp16 block_1_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_1_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_1_ffn_rmsnorm_maxval_clipped = clip(alpha = block_1_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_1_ffn_rmsnorm_maxval_clipped_beta_0, x = block_1_ffn_rmsnorm_maxval)[name = string("block_1_ffn_rmsnorm_maxval_clipped")]; + tensor block_1_ffn_rmsnorm_scaled = real_div(x = block_1_residual_1, y = block_1_ffn_rmsnorm_maxval_clipped)[name = string("block_1_ffn_rmsnorm_scaled")]; + tensor block_1_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_1_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_1_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_1_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_1_ffn_rmsnorm_scaled)[name = string("block_1_ffn_rmsnorm_squared_sum")]; + fp16 block_1_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_1_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_1_ffn_rmsnorm_rsqrt_epsilon_0, x = block_1_ffn_rmsnorm_squared_sum)[name = string("block_1_ffn_rmsnorm_rsqrt")]; + fp16 block_1_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_1_ffn_rmsnorm_dim_scaled = mul(x = block_1_ffn_rmsnorm_scaled, y = block_1_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_1_ffn_rmsnorm_dim_scaled")]; + tensor block_1_ffn_rmsnorm_normalized = mul(x = block_1_ffn_rmsnorm_dim_scaled, y = block_1_ffn_rmsnorm_rsqrt)[name = string("block_1_ffn_rmsnorm_normalized")]; + tensor block_1_ffn_rmsnorm_y_0 = const()[name = string("block_1_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304200768)))]; + tensor block_1_ffn_rmsnorm = mul(x = block_1_ffn_rmsnorm_normalized, y = block_1_ffn_rmsnorm_y_0)[name = string("block_1_ffn_rmsnorm")]; + tensor block_1_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304202624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307471296))))[name = string("block_1_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_2 = constexpr_blockwise_shift_scale(data = block_1_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307627008))))[name = string("constexpr_blockwise_shift_scale_2")]; + tensor block_1_ffn_inproj_strides_0 = const()[name = string("block_1_ffn_inproj_strides_0"), val = tensor([1])]; + string block_1_ffn_inproj_pad_type_0 = const()[name = string("block_1_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_1_ffn_inproj_pad_0 = const()[name = string("block_1_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_1_ffn_inproj_dilations_0 = const()[name = string("block_1_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_1_ffn_inproj_groups_0 = const()[name = string("block_1_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_1_ffn_inproj = conv(dilations = block_1_ffn_inproj_dilations_0, groups = block_1_ffn_inproj_groups_0, pad = block_1_ffn_inproj_pad_0, pad_type = block_1_ffn_inproj_pad_type_0, strides = block_1_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_2, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_inproj")]; + tensor block_1_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307636800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310905472))))[name = string("block_1_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_3 = constexpr_blockwise_shift_scale(data = block_1_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311061184))))[name = string("constexpr_blockwise_shift_scale_3")]; + tensor block_1_ffn_g_strides_0 = const()[name = string("block_1_ffn_g_strides_0"), val = tensor([1])]; + string block_1_ffn_g_pad_type_0 = const()[name = string("block_1_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_1_ffn_g_pad_0 = const()[name = string("block_1_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_1_ffn_g_dilations_0 = const()[name = string("block_1_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_1_ffn_g_groups_0 = const()[name = string("block_1_ffn_g_groups_0"), val = int32(1)]; + tensor block_1_ffn_g = conv(dilations = block_1_ffn_g_dilations_0, groups = block_1_ffn_g_groups_0, pad = block_1_ffn_g_pad_0, pad_type = block_1_ffn_g_pad_type_0, strides = block_1_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_3, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_g")]; + tensor block_1_ffn_g_activation = silu(x = block_1_ffn_g)[name = string("block_1_ffn_g_activation")]; + tensor block_1_ffn_x_gated = mul(x = block_1_ffn_inproj, y = block_1_ffn_g_activation)[name = string("block_1_ffn_x_gated")]; + tensor block_1_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311070976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314339648))))[name = string("block_1_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_4 = constexpr_blockwise_shift_scale(data = block_1_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314368384))))[name = string("constexpr_blockwise_shift_scale_4")]; + tensor block_1_ffn_outproj_strides_0 = const()[name = string("block_1_ffn_outproj_strides_0"), val = tensor([1])]; + string block_1_ffn_outproj_pad_type_0 = const()[name = string("block_1_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_1_ffn_outproj_pad_0 = const()[name = string("block_1_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_1_ffn_outproj_dilations_0 = const()[name = string("block_1_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_1_ffn_outproj_groups_0 = const()[name = string("block_1_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_1_ffn_outproj = conv(dilations = block_1_ffn_outproj_dilations_0, groups = block_1_ffn_outproj_groups_0, pad = block_1_ffn_outproj_pad_0, pad_type = block_1_ffn_outproj_pad_type_0, strides = block_1_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_4, x = block_1_ffn_x_gated)[name = string("block_1_ffn_outproj")]; + tensor block_1_residual_2 = add(x = block_1_ffn_outproj, y = block_1_residual_1)[name = string("block_1_residual_2")]; + tensor block_2_attention_rmsnorm_abs = abs(x = block_1_residual_2)[name = string("block_2_attention_rmsnorm_abs")]; + tensor block_2_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_2_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_2_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_2_attention_rmsnorm_maxval = reduce_max(axes = block_2_attention_rmsnorm_maxval_axes_0, keep_dims = block_2_attention_rmsnorm_maxval_keep_dims_0, x = block_2_attention_rmsnorm_abs)[name = string("block_2_attention_rmsnorm_maxval")]; + fp16 block_2_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_2_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_2_attention_rmsnorm_maxval_clipped = clip(alpha = block_2_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_2_attention_rmsnorm_maxval_clipped_beta_0, x = block_2_attention_rmsnorm_maxval)[name = string("block_2_attention_rmsnorm_maxval_clipped")]; + tensor block_2_attention_rmsnorm_scaled = real_div(x = block_1_residual_2, y = block_2_attention_rmsnorm_maxval_clipped)[name = string("block_2_attention_rmsnorm_scaled")]; + tensor block_2_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_2_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_2_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_2_attention_rmsnorm_squared_sum_keep_dims_0, x = block_2_attention_rmsnorm_scaled)[name = string("block_2_attention_rmsnorm_squared_sum")]; + fp16 block_2_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_2_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_2_attention_rmsnorm_rsqrt_epsilon_0, x = block_2_attention_rmsnorm_squared_sum)[name = string("block_2_attention_rmsnorm_rsqrt")]; + fp16 block_2_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_2_attention_rmsnorm_dim_scaled = mul(x = block_2_attention_rmsnorm_scaled, y = block_2_attention_rmsnorm_dim_scaled_y_0)[name = string("block_2_attention_rmsnorm_dim_scaled")]; + tensor block_2_attention_rmsnorm_normalized = mul(x = block_2_attention_rmsnorm_dim_scaled, y = block_2_attention_rmsnorm_rsqrt)[name = string("block_2_attention_rmsnorm_normalized")]; + tensor block_2_attention_rmsnorm_y_0 = const()[name = string("block_2_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314370240)))]; + tensor block_2_attention_rmsnorm = mul(x = block_2_attention_rmsnorm_normalized, y = block_2_attention_rmsnorm_y_0)[name = string("block_2_attention_rmsnorm")]; + tensor attention_2_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314372096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315146304))))[name = string("attention_2_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_5 = constexpr_blockwise_shift_scale(data = attention_2_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315183232))))[name = string("constexpr_blockwise_shift_scale_5")]; + tensor attention_2_qkvproj_bias_0 = const()[name = string("attention_2_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315185600)))]; + tensor attention_2_qkvproj_strides_0 = const()[name = string("attention_2_qkvproj_strides_0"), val = tensor([1])]; + string attention_2_qkvproj_pad_type_0 = const()[name = string("attention_2_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_2_qkvproj_pad_0 = const()[name = string("attention_2_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_2_qkvproj_dilations_0 = const()[name = string("attention_2_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_2_qkvproj_groups_0 = const()[name = string("attention_2_qkvproj_groups_0"), val = int32(1)]; + tensor attention_2_qkvproj = conv(bias = attention_2_qkvproj_bias_0, dilations = attention_2_qkvproj_dilations_0, groups = attention_2_qkvproj_groups_0, pad = attention_2_qkvproj_pad_0, pad_type = attention_2_qkvproj_pad_type_0, strides = attention_2_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_5, x = block_2_attention_rmsnorm)[name = string("attention_2_qkvproj")]; + tensor attention_2_head_reshape_shape_0 = const()[name = string("attention_2_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_2_head_reshape = reshape(shape = attention_2_head_reshape_shape_0, x = attention_2_qkvproj)[name = string("attention_2_head_reshape")]; + tensor attention_2_head_transpose_perm_0 = const()[name = string("attention_2_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_2_split_qkv_heads_axis_0 = const()[name = string("attention_2_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_2_split_qkv_heads_split_sizes_0 = const()[name = string("attention_2_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_2_head_transpose = transpose(perm = attention_2_head_transpose_perm_0, x = attention_2_head_reshape)[name = string("transpose_44")]; + tensor attention_2_split_qkv_heads_0, tensor attention_2_split_qkv_heads_1, tensor attention_2_split_qkv_heads_2 = split(axis = attention_2_split_qkv_heads_axis_0, split_sizes = attention_2_split_qkv_heads_split_sizes_0, x = attention_2_head_transpose)[name = string("attention_2_split_qkv_heads")]; + tensor attention_2_q_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_2_q_rope_lhs_mult")]; + int32 attention_2_q_rotate_half_split_num_splits_0 = const()[name = string("attention_2_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_2_q_rotate_half_split_axis_0 = const()[name = string("attention_2_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_2_q_rotate_half_split_0, tensor attention_2_q_rotate_half_split_1 = split(axis = attention_2_q_rotate_half_split_axis_0, num_splits = attention_2_q_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_0)[name = string("attention_2_q_rotate_half_split")]; + fp16 attention_2_q_rotate_half_neg_y_0 = const()[name = string("attention_2_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_2_q_rotate_half_neg = mul(x = attention_2_q_rotate_half_split_1, y = attention_2_q_rotate_half_neg_y_0)[name = string("attention_2_q_rotate_half_neg")]; + int32 attention_2_q_rotate_half_concat_axis_0 = const()[name = string("attention_2_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_2_q_rotate_half_concat_interleave_0 = const()[name = string("attention_2_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_2_q_rotate_half_concat = concat(axis = attention_2_q_rotate_half_concat_axis_0, interleave = attention_2_q_rotate_half_concat_interleave_0, values = (attention_2_q_rotate_half_neg, attention_2_q_rotate_half_split_0))[name = string("attention_2_q_rotate_half_concat")]; + tensor attention_2_q_rope_rhs_mult = mul(x = attention_2_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_q_rope_rhs_mult")]; + tensor attention_2_q_rope = add(x = attention_2_q_rope_lhs_mult, y = attention_2_q_rope_rhs_mult)[name = string("attention_2_q_rope")]; + tensor attention_2_k_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_2_k_rope_lhs_mult")]; + int32 attention_2_k_rotate_half_split_num_splits_0 = const()[name = string("attention_2_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_2_k_rotate_half_split_axis_0 = const()[name = string("attention_2_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_2_k_rotate_half_split_0, tensor attention_2_k_rotate_half_split_1 = split(axis = attention_2_k_rotate_half_split_axis_0, num_splits = attention_2_k_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_1)[name = string("attention_2_k_rotate_half_split")]; + fp16 attention_2_k_rotate_half_neg_y_0 = const()[name = string("attention_2_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_2_k_rotate_half_neg = mul(x = attention_2_k_rotate_half_split_1, y = attention_2_k_rotate_half_neg_y_0)[name = string("attention_2_k_rotate_half_neg")]; + int32 attention_2_k_rotate_half_concat_axis_0 = const()[name = string("attention_2_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_2_k_rotate_half_concat_interleave_0 = const()[name = string("attention_2_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_2_k_rotate_half_concat = concat(axis = attention_2_k_rotate_half_concat_axis_0, interleave = attention_2_k_rotate_half_concat_interleave_0, values = (attention_2_k_rotate_half_neg, attention_2_k_rotate_half_split_0))[name = string("attention_2_k_rotate_half_concat")]; + tensor attention_2_k_rope_rhs_mult = mul(x = attention_2_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_k_rope_rhs_mult")]; + tensor attention_2_k_rope = add(x = attention_2_k_rope_lhs_mult, y = attention_2_k_rope_rhs_mult)[name = string("attention_2_k_rope")]; + int32 attention_2_q_splits_axis_0 = const()[name = string("attention_2_q_splits_axis_0"), val = int32(1)]; + int32 attention_2_q_splits_num_splits_0 = const()[name = string("attention_2_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_2_q_splits_0, tensor attention_2_q_splits_1 = split(axis = attention_2_q_splits_axis_0, num_splits = attention_2_q_splits_num_splits_0, x = attention_2_q_rope)[name = string("attention_2_q_splits")]; + tensor attention_2_update_begin_0_values0_0 = const()[name = string("attention_2_update_begin_0_values0_0"), val = tensor([2])]; + tensor attention_2_update_begin_0_values1_0 = const()[name = string("attention_2_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_2_update_begin_0_values3_0 = const()[name = string("attention_2_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_2_update_begin_0_axis_0 = const()[name = string("attention_2_update_begin_0_axis_0"), val = int32(0)]; + bool attention_2_update_begin_0_interleave_0 = const()[name = string("attention_2_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_2_update_begin_0 = concat(axis = attention_2_update_begin_0_axis_0, interleave = attention_2_update_begin_0_interleave_0, values = (attention_2_update_begin_0_values0_0, attention_2_update_begin_0_values1_0, query_pos1, attention_2_update_begin_0_values3_0))[name = string("attention_2_update_begin_0")]; + tensor attention_2_update_end_0_values0_0 = const()[name = string("attention_2_update_end_0_values0_0"), val = tensor([3])]; + tensor attention_2_update_end_0_values1_0 = const()[name = string("attention_2_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_2_update_end_0_values3_0 = const()[name = string("attention_2_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_2_update_end_0_axis_0 = const()[name = string("attention_2_update_end_0_axis_0"), val = int32(0)]; + bool attention_2_update_end_0_interleave_0 = const()[name = string("attention_2_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_2_update_end_0 = concat(axis = attention_2_update_end_0_axis_0, interleave = attention_2_update_end_0_interleave_0, values = (attention_2_update_end_0_values0_0, attention_2_update_end_0_values1_0, end_pos_0, attention_2_update_end_0_values3_0))[name = string("attention_2_update_end_0")]; + tensor attention_2_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_updated_key_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_key_cache_0_squeeze_mask_0, update = attention_2_k_rope, x = coreml_update_state_2)[name = string("attention_2_updated_key_cache_0")]; + write_state(data = attention_2_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_52_write_state")]; + tensor coreml_update_state_4 = read_state(input = key_cache_state)[name = string("coreml_update_state_52")]; + tensor attention_2_key_cache_begin_0 = const()[name = string("attention_2_key_cache_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor attention_2_key_cache_end_0 = const()[name = string("attention_2_key_cache_end_0"), val = tensor([3, 2, 512, 64])]; + tensor attention_2_key_cache_squeeze_mask_0 = const()[name = string("attention_2_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_key_cache = slice_by_index(begin = attention_2_key_cache_begin_0, end = attention_2_key_cache_end_0, squeeze_mask = attention_2_key_cache_squeeze_mask_0, x = coreml_update_state_4)[name = string("attention_2_key_cache")]; + int32 attention_2_key_cache_head_axis_0 = const()[name = string("attention_2_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_2_key_cache_head_num_splits_0 = const()[name = string("attention_2_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_2_key_cache_head_0, tensor attention_2_key_cache_head_1 = split(axis = attention_2_key_cache_head_axis_0, num_splits = attention_2_key_cache_head_num_splits_0, x = attention_2_key_cache)[name = string("attention_2_key_cache_head")]; + tensor attention_2_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_updated_value_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_value_cache_0_squeeze_mask_0, update = attention_2_split_qkv_heads_2, x = coreml_update_state_3)[name = string("attention_2_updated_value_cache_0")]; + write_state(data = attention_2_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_53_write_state")]; + tensor coreml_update_state_5 = read_state(input = value_cache_state)[name = string("coreml_update_state_53")]; + tensor attention_2_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_2_slice_current_layer_value_cache_begin_0"), val = tensor([2, 0, 0, 0])]; + tensor attention_2_slice_current_layer_value_cache_end_0 = const()[name = string("attention_2_slice_current_layer_value_cache_end_0"), val = tensor([3, 2, 512, 64])]; + tensor attention_2_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_2_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_2_slice_current_layer_value_cache = slice_by_index(begin = attention_2_slice_current_layer_value_cache_begin_0, end = attention_2_slice_current_layer_value_cache_end_0, squeeze_mask = attention_2_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_5)[name = string("attention_2_slice_current_layer_value_cache")]; + int32 attention_2_slice_value_cache_heads_axis_0 = const()[name = string("attention_2_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_2_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_2_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_2_slice_value_cache_heads_0, tensor attention_2_slice_value_cache_heads_1 = split(axis = attention_2_slice_value_cache_heads_axis_0, num_splits = attention_2_slice_value_cache_heads_num_splits_0, x = attention_2_slice_current_layer_value_cache)[name = string("attention_2_slice_value_cache_heads")]; + bool attention_2_scores_0_transpose_y_0 = const()[name = string("attention_2_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_2_scores_0_transpose_x_0 = const()[name = string("attention_2_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_2_scores_0 = matmul(transpose_x = attention_2_scores_0_transpose_x_0, transpose_y = attention_2_scores_0_transpose_y_0, x = attention_2_key_cache_head_0, y = attention_2_q_splits_0)[name = string("attention_2_scores_0")]; + fp16 attention_2_scaled_scores_0_y_0 = const()[name = string("attention_2_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_2_scaled_scores_0 = mul(x = attention_2_scores_0, y = attention_2_scaled_scores_0_y_0)[name = string("attention_2_scaled_scores_0")]; + tensor attention_2_masked_scaled_scores_0 = add(x = attention_2_scaled_scores_0, y = transpose_0)[name = string("attention_2_masked_scaled_scores_0")]; + int32 softmax_4_axis_0 = const()[name = string("softmax_4_axis_0"), val = int32(-2)]; + tensor softmax_4 = softmax(axis = softmax_4_axis_0, x = attention_2_masked_scaled_scores_0)[name = string("softmax_4")]; + bool attention_2_attention_0_transpose_x_0 = const()[name = string("attention_2_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_2_attention_0_transpose_y_0 = const()[name = string("attention_2_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_2_attention_0 = matmul(transpose_x = attention_2_attention_0_transpose_x_0, transpose_y = attention_2_attention_0_transpose_y_0, x = softmax_4, y = attention_2_slice_value_cache_heads_0)[name = string("attention_2_attention_0")]; + bool attention_2_scores_1_transpose_y_0 = const()[name = string("attention_2_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_2_scores_1_transpose_x_0 = const()[name = string("attention_2_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_2_scores_1 = matmul(transpose_x = attention_2_scores_1_transpose_x_0, transpose_y = attention_2_scores_1_transpose_y_0, x = attention_2_key_cache_head_1, y = attention_2_q_splits_1)[name = string("attention_2_scores_1")]; + fp16 attention_2_scaled_scores_1_y_0 = const()[name = string("attention_2_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_2_scaled_scores_1 = mul(x = attention_2_scores_1, y = attention_2_scaled_scores_1_y_0)[name = string("attention_2_scaled_scores_1")]; + tensor attention_2_masked_scaled_scores_1 = add(x = attention_2_scaled_scores_1, y = transpose_0)[name = string("attention_2_masked_scaled_scores_1")]; + int32 softmax_5_axis_0 = const()[name = string("softmax_5_axis_0"), val = int32(-2)]; + tensor softmax_5 = softmax(axis = softmax_5_axis_0, x = attention_2_masked_scaled_scores_1)[name = string("softmax_5")]; + bool attention_2_attention_1_transpose_x_0 = const()[name = string("attention_2_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_2_attention_1_transpose_y_0 = const()[name = string("attention_2_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_2_attention_1 = matmul(transpose_x = attention_2_attention_1_transpose_x_0, transpose_y = attention_2_attention_1_transpose_y_0, x = softmax_5, y = attention_2_slice_value_cache_heads_1)[name = string("attention_2_attention_1")]; + int32 attention_2_concat_attention_all_heads_axis_0 = const()[name = string("attention_2_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_2_concat_attention_all_heads_interleave_0 = const()[name = string("attention_2_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_2_concat_attention_all_heads = concat(axis = attention_2_concat_attention_all_heads_axis_0, interleave = attention_2_concat_attention_all_heads_interleave_0, values = (attention_2_attention_0, attention_2_attention_1))[name = string("attention_2_concat_attention_all_heads")]; + tensor attention_2_channels_first_retransposed_perm_0 = const()[name = string("attention_2_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_2_reshaped_shape_0 = const()[name = string("attention_2_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_2_channels_first_retransposed = transpose(perm = attention_2_channels_first_retransposed_perm_0, x = attention_2_concat_attention_all_heads)[name = string("transpose_43")]; + tensor attention_2_reshaped = reshape(shape = attention_2_reshaped_shape_0, x = attention_2_channels_first_retransposed)[name = string("attention_2_reshaped")]; + tensor attention_2_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315187968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315790144))))[name = string("attention_2_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_6 = constexpr_blockwise_shift_scale(data = attention_2_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315818880))))[name = string("constexpr_blockwise_shift_scale_6")]; + tensor attention_2_outproj_strides_0 = const()[name = string("attention_2_outproj_strides_0"), val = tensor([1])]; + string attention_2_outproj_pad_type_0 = const()[name = string("attention_2_outproj_pad_type_0"), val = string("valid")]; + tensor attention_2_outproj_pad_0 = const()[name = string("attention_2_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_2_outproj_dilations_0 = const()[name = string("attention_2_outproj_dilations_0"), val = tensor([1])]; + int32 attention_2_outproj_groups_0 = const()[name = string("attention_2_outproj_groups_0"), val = int32(1)]; + tensor attention_2_outproj = conv(dilations = attention_2_outproj_dilations_0, groups = attention_2_outproj_groups_0, pad = attention_2_outproj_pad_0, pad_type = attention_2_outproj_pad_type_0, strides = attention_2_outproj_strides_0, weight = constexpr_blockwise_shift_scale_6, x = attention_2_reshaped)[name = string("attention_2_outproj")]; + tensor block_2_residual_1 = add(x = block_1_residual_2, y = attention_2_outproj)[name = string("block_2_residual_1")]; + tensor block_2_ffn_rmsnorm_abs = abs(x = block_2_residual_1)[name = string("block_2_ffn_rmsnorm_abs")]; + tensor block_2_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_2_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_2_ffn_rmsnorm_maxval = reduce_max(axes = block_2_ffn_rmsnorm_maxval_axes_0, keep_dims = block_2_ffn_rmsnorm_maxval_keep_dims_0, x = block_2_ffn_rmsnorm_abs)[name = string("block_2_ffn_rmsnorm_maxval")]; + fp16 block_2_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_2_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_2_ffn_rmsnorm_maxval_clipped = clip(alpha = block_2_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_2_ffn_rmsnorm_maxval_clipped_beta_0, x = block_2_ffn_rmsnorm_maxval)[name = string("block_2_ffn_rmsnorm_maxval_clipped")]; + tensor block_2_ffn_rmsnorm_scaled = real_div(x = block_2_residual_1, y = block_2_ffn_rmsnorm_maxval_clipped)[name = string("block_2_ffn_rmsnorm_scaled")]; + tensor block_2_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_2_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_2_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_2_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_2_ffn_rmsnorm_scaled)[name = string("block_2_ffn_rmsnorm_squared_sum")]; + fp16 block_2_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_2_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_2_ffn_rmsnorm_rsqrt_epsilon_0, x = block_2_ffn_rmsnorm_squared_sum)[name = string("block_2_ffn_rmsnorm_rsqrt")]; + fp16 block_2_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_2_ffn_rmsnorm_dim_scaled = mul(x = block_2_ffn_rmsnorm_scaled, y = block_2_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_2_ffn_rmsnorm_dim_scaled")]; + tensor block_2_ffn_rmsnorm_normalized = mul(x = block_2_ffn_rmsnorm_dim_scaled, y = block_2_ffn_rmsnorm_rsqrt)[name = string("block_2_ffn_rmsnorm_normalized")]; + tensor block_2_ffn_rmsnorm_y_0 = const()[name = string("block_2_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315820736)))]; + tensor block_2_ffn_rmsnorm = mul(x = block_2_ffn_rmsnorm_normalized, y = block_2_ffn_rmsnorm_y_0)[name = string("block_2_ffn_rmsnorm")]; + tensor block_2_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315822592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319091264))))[name = string("block_2_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_7 = constexpr_blockwise_shift_scale(data = block_2_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319246976))))[name = string("constexpr_blockwise_shift_scale_7")]; + tensor block_2_ffn_inproj_strides_0 = const()[name = string("block_2_ffn_inproj_strides_0"), val = tensor([1])]; + string block_2_ffn_inproj_pad_type_0 = const()[name = string("block_2_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_2_ffn_inproj_pad_0 = const()[name = string("block_2_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_2_ffn_inproj_dilations_0 = const()[name = string("block_2_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_2_ffn_inproj_groups_0 = const()[name = string("block_2_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_2_ffn_inproj = conv(dilations = block_2_ffn_inproj_dilations_0, groups = block_2_ffn_inproj_groups_0, pad = block_2_ffn_inproj_pad_0, pad_type = block_2_ffn_inproj_pad_type_0, strides = block_2_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_7, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_inproj")]; + tensor block_2_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319256768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322525440))))[name = string("block_2_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_8 = constexpr_blockwise_shift_scale(data = block_2_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322681152))))[name = string("constexpr_blockwise_shift_scale_8")]; + tensor block_2_ffn_g_strides_0 = const()[name = string("block_2_ffn_g_strides_0"), val = tensor([1])]; + string block_2_ffn_g_pad_type_0 = const()[name = string("block_2_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_2_ffn_g_pad_0 = const()[name = string("block_2_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_2_ffn_g_dilations_0 = const()[name = string("block_2_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_2_ffn_g_groups_0 = const()[name = string("block_2_ffn_g_groups_0"), val = int32(1)]; + tensor block_2_ffn_g = conv(dilations = block_2_ffn_g_dilations_0, groups = block_2_ffn_g_groups_0, pad = block_2_ffn_g_pad_0, pad_type = block_2_ffn_g_pad_type_0, strides = block_2_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_8, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_g")]; + tensor block_2_ffn_g_activation = silu(x = block_2_ffn_g)[name = string("block_2_ffn_g_activation")]; + tensor block_2_ffn_x_gated = mul(x = block_2_ffn_inproj, y = block_2_ffn_g_activation)[name = string("block_2_ffn_x_gated")]; + tensor block_2_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322690944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325959616))))[name = string("block_2_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_9 = constexpr_blockwise_shift_scale(data = block_2_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325988352))))[name = string("constexpr_blockwise_shift_scale_9")]; + tensor block_2_ffn_outproj_strides_0 = const()[name = string("block_2_ffn_outproj_strides_0"), val = tensor([1])]; + string block_2_ffn_outproj_pad_type_0 = const()[name = string("block_2_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_2_ffn_outproj_pad_0 = const()[name = string("block_2_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_2_ffn_outproj_dilations_0 = const()[name = string("block_2_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_2_ffn_outproj_groups_0 = const()[name = string("block_2_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_2_ffn_outproj = conv(dilations = block_2_ffn_outproj_dilations_0, groups = block_2_ffn_outproj_groups_0, pad = block_2_ffn_outproj_pad_0, pad_type = block_2_ffn_outproj_pad_type_0, strides = block_2_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_9, x = block_2_ffn_x_gated)[name = string("block_2_ffn_outproj")]; + tensor block_2_residual_2 = add(x = block_2_ffn_outproj, y = block_2_residual_1)[name = string("block_2_residual_2")]; + tensor block_3_attention_rmsnorm_abs = abs(x = block_2_residual_2)[name = string("block_3_attention_rmsnorm_abs")]; + tensor block_3_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_3_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_3_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_3_attention_rmsnorm_maxval = reduce_max(axes = block_3_attention_rmsnorm_maxval_axes_0, keep_dims = block_3_attention_rmsnorm_maxval_keep_dims_0, x = block_3_attention_rmsnorm_abs)[name = string("block_3_attention_rmsnorm_maxval")]; + fp16 block_3_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_3_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_3_attention_rmsnorm_maxval_clipped = clip(alpha = block_3_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_3_attention_rmsnorm_maxval_clipped_beta_0, x = block_3_attention_rmsnorm_maxval)[name = string("block_3_attention_rmsnorm_maxval_clipped")]; + tensor block_3_attention_rmsnorm_scaled = real_div(x = block_2_residual_2, y = block_3_attention_rmsnorm_maxval_clipped)[name = string("block_3_attention_rmsnorm_scaled")]; + tensor block_3_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_3_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_3_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_3_attention_rmsnorm_squared_sum_keep_dims_0, x = block_3_attention_rmsnorm_scaled)[name = string("block_3_attention_rmsnorm_squared_sum")]; + fp16 block_3_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_3_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_3_attention_rmsnorm_rsqrt_epsilon_0, x = block_3_attention_rmsnorm_squared_sum)[name = string("block_3_attention_rmsnorm_rsqrt")]; + fp16 block_3_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_3_attention_rmsnorm_dim_scaled = mul(x = block_3_attention_rmsnorm_scaled, y = block_3_attention_rmsnorm_dim_scaled_y_0)[name = string("block_3_attention_rmsnorm_dim_scaled")]; + tensor block_3_attention_rmsnorm_normalized = mul(x = block_3_attention_rmsnorm_dim_scaled, y = block_3_attention_rmsnorm_rsqrt)[name = string("block_3_attention_rmsnorm_normalized")]; + tensor block_3_attention_rmsnorm_y_0 = const()[name = string("block_3_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325990208)))]; + tensor block_3_attention_rmsnorm = mul(x = block_3_attention_rmsnorm_normalized, y = block_3_attention_rmsnorm_y_0)[name = string("block_3_attention_rmsnorm")]; + tensor attention_3_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325992064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326766272))))[name = string("attention_3_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_10 = constexpr_blockwise_shift_scale(data = attention_3_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326803200))))[name = string("constexpr_blockwise_shift_scale_10")]; + tensor attention_3_qkvproj_bias_0 = const()[name = string("attention_3_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326805568)))]; + tensor attention_3_qkvproj_strides_0 = const()[name = string("attention_3_qkvproj_strides_0"), val = tensor([1])]; + string attention_3_qkvproj_pad_type_0 = const()[name = string("attention_3_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_3_qkvproj_pad_0 = const()[name = string("attention_3_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_3_qkvproj_dilations_0 = const()[name = string("attention_3_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_3_qkvproj_groups_0 = const()[name = string("attention_3_qkvproj_groups_0"), val = int32(1)]; + tensor attention_3_qkvproj = conv(bias = attention_3_qkvproj_bias_0, dilations = attention_3_qkvproj_dilations_0, groups = attention_3_qkvproj_groups_0, pad = attention_3_qkvproj_pad_0, pad_type = attention_3_qkvproj_pad_type_0, strides = attention_3_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_10, x = block_3_attention_rmsnorm)[name = string("attention_3_qkvproj")]; + tensor attention_3_head_reshape_shape_0 = const()[name = string("attention_3_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_3_head_reshape = reshape(shape = attention_3_head_reshape_shape_0, x = attention_3_qkvproj)[name = string("attention_3_head_reshape")]; + tensor attention_3_head_transpose_perm_0 = const()[name = string("attention_3_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_3_split_qkv_heads_axis_0 = const()[name = string("attention_3_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_3_split_qkv_heads_split_sizes_0 = const()[name = string("attention_3_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_3_head_transpose = transpose(perm = attention_3_head_transpose_perm_0, x = attention_3_head_reshape)[name = string("transpose_42")]; + tensor attention_3_split_qkv_heads_0, tensor attention_3_split_qkv_heads_1, tensor attention_3_split_qkv_heads_2 = split(axis = attention_3_split_qkv_heads_axis_0, split_sizes = attention_3_split_qkv_heads_split_sizes_0, x = attention_3_head_transpose)[name = string("attention_3_split_qkv_heads")]; + tensor attention_3_q_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_3_q_rope_lhs_mult")]; + int32 attention_3_q_rotate_half_split_num_splits_0 = const()[name = string("attention_3_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_3_q_rotate_half_split_axis_0 = const()[name = string("attention_3_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_3_q_rotate_half_split_0, tensor attention_3_q_rotate_half_split_1 = split(axis = attention_3_q_rotate_half_split_axis_0, num_splits = attention_3_q_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_0)[name = string("attention_3_q_rotate_half_split")]; + fp16 attention_3_q_rotate_half_neg_y_0 = const()[name = string("attention_3_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_3_q_rotate_half_neg = mul(x = attention_3_q_rotate_half_split_1, y = attention_3_q_rotate_half_neg_y_0)[name = string("attention_3_q_rotate_half_neg")]; + int32 attention_3_q_rotate_half_concat_axis_0 = const()[name = string("attention_3_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_3_q_rotate_half_concat_interleave_0 = const()[name = string("attention_3_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_3_q_rotate_half_concat = concat(axis = attention_3_q_rotate_half_concat_axis_0, interleave = attention_3_q_rotate_half_concat_interleave_0, values = (attention_3_q_rotate_half_neg, attention_3_q_rotate_half_split_0))[name = string("attention_3_q_rotate_half_concat")]; + tensor attention_3_q_rope_rhs_mult = mul(x = attention_3_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_q_rope_rhs_mult")]; + tensor attention_3_q_rope = add(x = attention_3_q_rope_lhs_mult, y = attention_3_q_rope_rhs_mult)[name = string("attention_3_q_rope")]; + tensor attention_3_k_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_3_k_rope_lhs_mult")]; + int32 attention_3_k_rotate_half_split_num_splits_0 = const()[name = string("attention_3_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_3_k_rotate_half_split_axis_0 = const()[name = string("attention_3_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_3_k_rotate_half_split_0, tensor attention_3_k_rotate_half_split_1 = split(axis = attention_3_k_rotate_half_split_axis_0, num_splits = attention_3_k_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_1)[name = string("attention_3_k_rotate_half_split")]; + fp16 attention_3_k_rotate_half_neg_y_0 = const()[name = string("attention_3_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_3_k_rotate_half_neg = mul(x = attention_3_k_rotate_half_split_1, y = attention_3_k_rotate_half_neg_y_0)[name = string("attention_3_k_rotate_half_neg")]; + int32 attention_3_k_rotate_half_concat_axis_0 = const()[name = string("attention_3_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_3_k_rotate_half_concat_interleave_0 = const()[name = string("attention_3_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_3_k_rotate_half_concat = concat(axis = attention_3_k_rotate_half_concat_axis_0, interleave = attention_3_k_rotate_half_concat_interleave_0, values = (attention_3_k_rotate_half_neg, attention_3_k_rotate_half_split_0))[name = string("attention_3_k_rotate_half_concat")]; + tensor attention_3_k_rope_rhs_mult = mul(x = attention_3_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_k_rope_rhs_mult")]; + tensor attention_3_k_rope = add(x = attention_3_k_rope_lhs_mult, y = attention_3_k_rope_rhs_mult)[name = string("attention_3_k_rope")]; + int32 attention_3_q_splits_axis_0 = const()[name = string("attention_3_q_splits_axis_0"), val = int32(1)]; + int32 attention_3_q_splits_num_splits_0 = const()[name = string("attention_3_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_3_q_splits_0, tensor attention_3_q_splits_1 = split(axis = attention_3_q_splits_axis_0, num_splits = attention_3_q_splits_num_splits_0, x = attention_3_q_rope)[name = string("attention_3_q_splits")]; + tensor attention_3_update_begin_0_values0_0 = const()[name = string("attention_3_update_begin_0_values0_0"), val = tensor([3])]; + tensor attention_3_update_begin_0_values1_0 = const()[name = string("attention_3_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_3_update_begin_0_values3_0 = const()[name = string("attention_3_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_3_update_begin_0_axis_0 = const()[name = string("attention_3_update_begin_0_axis_0"), val = int32(0)]; + bool attention_3_update_begin_0_interleave_0 = const()[name = string("attention_3_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_3_update_begin_0 = concat(axis = attention_3_update_begin_0_axis_0, interleave = attention_3_update_begin_0_interleave_0, values = (attention_3_update_begin_0_values0_0, attention_3_update_begin_0_values1_0, query_pos1, attention_3_update_begin_0_values3_0))[name = string("attention_3_update_begin_0")]; + tensor attention_3_update_end_0_values0_0 = const()[name = string("attention_3_update_end_0_values0_0"), val = tensor([4])]; + tensor attention_3_update_end_0_values1_0 = const()[name = string("attention_3_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_3_update_end_0_values3_0 = const()[name = string("attention_3_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_3_update_end_0_axis_0 = const()[name = string("attention_3_update_end_0_axis_0"), val = int32(0)]; + bool attention_3_update_end_0_interleave_0 = const()[name = string("attention_3_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_3_update_end_0 = concat(axis = attention_3_update_end_0_axis_0, interleave = attention_3_update_end_0_interleave_0, values = (attention_3_update_end_0_values0_0, attention_3_update_end_0_values1_0, end_pos_0, attention_3_update_end_0_values3_0))[name = string("attention_3_update_end_0")]; + tensor attention_3_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_updated_key_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_key_cache_0_squeeze_mask_0, update = attention_3_k_rope, x = coreml_update_state_4)[name = string("attention_3_updated_key_cache_0")]; + write_state(data = attention_3_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_54_write_state")]; + tensor coreml_update_state_6 = read_state(input = key_cache_state)[name = string("coreml_update_state_54")]; + tensor attention_3_key_cache_begin_0 = const()[name = string("attention_3_key_cache_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor attention_3_key_cache_end_0 = const()[name = string("attention_3_key_cache_end_0"), val = tensor([4, 2, 512, 64])]; + tensor attention_3_key_cache_squeeze_mask_0 = const()[name = string("attention_3_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_key_cache = slice_by_index(begin = attention_3_key_cache_begin_0, end = attention_3_key_cache_end_0, squeeze_mask = attention_3_key_cache_squeeze_mask_0, x = coreml_update_state_6)[name = string("attention_3_key_cache")]; + int32 attention_3_key_cache_head_axis_0 = const()[name = string("attention_3_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_3_key_cache_head_num_splits_0 = const()[name = string("attention_3_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_3_key_cache_head_0, tensor attention_3_key_cache_head_1 = split(axis = attention_3_key_cache_head_axis_0, num_splits = attention_3_key_cache_head_num_splits_0, x = attention_3_key_cache)[name = string("attention_3_key_cache_head")]; + tensor attention_3_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_updated_value_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_value_cache_0_squeeze_mask_0, update = attention_3_split_qkv_heads_2, x = coreml_update_state_5)[name = string("attention_3_updated_value_cache_0")]; + write_state(data = attention_3_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_55_write_state")]; + tensor coreml_update_state_7 = read_state(input = value_cache_state)[name = string("coreml_update_state_55")]; + tensor attention_3_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_3_slice_current_layer_value_cache_begin_0"), val = tensor([3, 0, 0, 0])]; + tensor attention_3_slice_current_layer_value_cache_end_0 = const()[name = string("attention_3_slice_current_layer_value_cache_end_0"), val = tensor([4, 2, 512, 64])]; + tensor attention_3_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_3_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_3_slice_current_layer_value_cache = slice_by_index(begin = attention_3_slice_current_layer_value_cache_begin_0, end = attention_3_slice_current_layer_value_cache_end_0, squeeze_mask = attention_3_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_7)[name = string("attention_3_slice_current_layer_value_cache")]; + int32 attention_3_slice_value_cache_heads_axis_0 = const()[name = string("attention_3_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_3_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_3_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_3_slice_value_cache_heads_0, tensor attention_3_slice_value_cache_heads_1 = split(axis = attention_3_slice_value_cache_heads_axis_0, num_splits = attention_3_slice_value_cache_heads_num_splits_0, x = attention_3_slice_current_layer_value_cache)[name = string("attention_3_slice_value_cache_heads")]; + bool attention_3_scores_0_transpose_y_0 = const()[name = string("attention_3_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_3_scores_0_transpose_x_0 = const()[name = string("attention_3_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_3_scores_0 = matmul(transpose_x = attention_3_scores_0_transpose_x_0, transpose_y = attention_3_scores_0_transpose_y_0, x = attention_3_key_cache_head_0, y = attention_3_q_splits_0)[name = string("attention_3_scores_0")]; + fp16 attention_3_scaled_scores_0_y_0 = const()[name = string("attention_3_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_3_scaled_scores_0 = mul(x = attention_3_scores_0, y = attention_3_scaled_scores_0_y_0)[name = string("attention_3_scaled_scores_0")]; + tensor attention_3_masked_scaled_scores_0 = add(x = attention_3_scaled_scores_0, y = transpose_0)[name = string("attention_3_masked_scaled_scores_0")]; + int32 softmax_6_axis_0 = const()[name = string("softmax_6_axis_0"), val = int32(-2)]; + tensor softmax_6 = softmax(axis = softmax_6_axis_0, x = attention_3_masked_scaled_scores_0)[name = string("softmax_6")]; + bool attention_3_attention_0_transpose_x_0 = const()[name = string("attention_3_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_3_attention_0_transpose_y_0 = const()[name = string("attention_3_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_3_attention_0 = matmul(transpose_x = attention_3_attention_0_transpose_x_0, transpose_y = attention_3_attention_0_transpose_y_0, x = softmax_6, y = attention_3_slice_value_cache_heads_0)[name = string("attention_3_attention_0")]; + bool attention_3_scores_1_transpose_y_0 = const()[name = string("attention_3_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_3_scores_1_transpose_x_0 = const()[name = string("attention_3_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_3_scores_1 = matmul(transpose_x = attention_3_scores_1_transpose_x_0, transpose_y = attention_3_scores_1_transpose_y_0, x = attention_3_key_cache_head_1, y = attention_3_q_splits_1)[name = string("attention_3_scores_1")]; + fp16 attention_3_scaled_scores_1_y_0 = const()[name = string("attention_3_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_3_scaled_scores_1 = mul(x = attention_3_scores_1, y = attention_3_scaled_scores_1_y_0)[name = string("attention_3_scaled_scores_1")]; + tensor attention_3_masked_scaled_scores_1 = add(x = attention_3_scaled_scores_1, y = transpose_0)[name = string("attention_3_masked_scaled_scores_1")]; + int32 softmax_7_axis_0 = const()[name = string("softmax_7_axis_0"), val = int32(-2)]; + tensor softmax_7 = softmax(axis = softmax_7_axis_0, x = attention_3_masked_scaled_scores_1)[name = string("softmax_7")]; + bool attention_3_attention_1_transpose_x_0 = const()[name = string("attention_3_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_3_attention_1_transpose_y_0 = const()[name = string("attention_3_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_3_attention_1 = matmul(transpose_x = attention_3_attention_1_transpose_x_0, transpose_y = attention_3_attention_1_transpose_y_0, x = softmax_7, y = attention_3_slice_value_cache_heads_1)[name = string("attention_3_attention_1")]; + int32 attention_3_concat_attention_all_heads_axis_0 = const()[name = string("attention_3_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_3_concat_attention_all_heads_interleave_0 = const()[name = string("attention_3_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_3_concat_attention_all_heads = concat(axis = attention_3_concat_attention_all_heads_axis_0, interleave = attention_3_concat_attention_all_heads_interleave_0, values = (attention_3_attention_0, attention_3_attention_1))[name = string("attention_3_concat_attention_all_heads")]; + tensor attention_3_channels_first_retransposed_perm_0 = const()[name = string("attention_3_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_3_reshaped_shape_0 = const()[name = string("attention_3_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_3_channels_first_retransposed = transpose(perm = attention_3_channels_first_retransposed_perm_0, x = attention_3_concat_attention_all_heads)[name = string("transpose_41")]; + tensor attention_3_reshaped = reshape(shape = attention_3_reshaped_shape_0, x = attention_3_channels_first_retransposed)[name = string("attention_3_reshaped")]; + tensor attention_3_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326807936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327410112))))[name = string("attention_3_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_11 = constexpr_blockwise_shift_scale(data = attention_3_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327438848))))[name = string("constexpr_blockwise_shift_scale_11")]; + tensor attention_3_outproj_strides_0 = const()[name = string("attention_3_outproj_strides_0"), val = tensor([1])]; + string attention_3_outproj_pad_type_0 = const()[name = string("attention_3_outproj_pad_type_0"), val = string("valid")]; + tensor attention_3_outproj_pad_0 = const()[name = string("attention_3_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_3_outproj_dilations_0 = const()[name = string("attention_3_outproj_dilations_0"), val = tensor([1])]; + int32 attention_3_outproj_groups_0 = const()[name = string("attention_3_outproj_groups_0"), val = int32(1)]; + tensor attention_3_outproj = conv(dilations = attention_3_outproj_dilations_0, groups = attention_3_outproj_groups_0, pad = attention_3_outproj_pad_0, pad_type = attention_3_outproj_pad_type_0, strides = attention_3_outproj_strides_0, weight = constexpr_blockwise_shift_scale_11, x = attention_3_reshaped)[name = string("attention_3_outproj")]; + tensor block_3_residual_1 = add(x = block_2_residual_2, y = attention_3_outproj)[name = string("block_3_residual_1")]; + tensor block_3_ffn_rmsnorm_abs = abs(x = block_3_residual_1)[name = string("block_3_ffn_rmsnorm_abs")]; + tensor block_3_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_3_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_3_ffn_rmsnorm_maxval = reduce_max(axes = block_3_ffn_rmsnorm_maxval_axes_0, keep_dims = block_3_ffn_rmsnorm_maxval_keep_dims_0, x = block_3_ffn_rmsnorm_abs)[name = string("block_3_ffn_rmsnorm_maxval")]; + fp16 block_3_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_3_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_3_ffn_rmsnorm_maxval_clipped = clip(alpha = block_3_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_3_ffn_rmsnorm_maxval_clipped_beta_0, x = block_3_ffn_rmsnorm_maxval)[name = string("block_3_ffn_rmsnorm_maxval_clipped")]; + tensor block_3_ffn_rmsnorm_scaled = real_div(x = block_3_residual_1, y = block_3_ffn_rmsnorm_maxval_clipped)[name = string("block_3_ffn_rmsnorm_scaled")]; + tensor block_3_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_3_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_3_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_3_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_3_ffn_rmsnorm_scaled)[name = string("block_3_ffn_rmsnorm_squared_sum")]; + fp16 block_3_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_3_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_3_ffn_rmsnorm_rsqrt_epsilon_0, x = block_3_ffn_rmsnorm_squared_sum)[name = string("block_3_ffn_rmsnorm_rsqrt")]; + fp16 block_3_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_3_ffn_rmsnorm_dim_scaled = mul(x = block_3_ffn_rmsnorm_scaled, y = block_3_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_3_ffn_rmsnorm_dim_scaled")]; + tensor block_3_ffn_rmsnorm_normalized = mul(x = block_3_ffn_rmsnorm_dim_scaled, y = block_3_ffn_rmsnorm_rsqrt)[name = string("block_3_ffn_rmsnorm_normalized")]; + tensor block_3_ffn_rmsnorm_y_0 = const()[name = string("block_3_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327440704)))]; + tensor block_3_ffn_rmsnorm = mul(x = block_3_ffn_rmsnorm_normalized, y = block_3_ffn_rmsnorm_y_0)[name = string("block_3_ffn_rmsnorm")]; + tensor block_3_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327442560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330711232))))[name = string("block_3_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_12 = constexpr_blockwise_shift_scale(data = block_3_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330866944))))[name = string("constexpr_blockwise_shift_scale_12")]; + tensor block_3_ffn_inproj_strides_0 = const()[name = string("block_3_ffn_inproj_strides_0"), val = tensor([1])]; + string block_3_ffn_inproj_pad_type_0 = const()[name = string("block_3_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_3_ffn_inproj_pad_0 = const()[name = string("block_3_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_3_ffn_inproj_dilations_0 = const()[name = string("block_3_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_3_ffn_inproj_groups_0 = const()[name = string("block_3_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_3_ffn_inproj = conv(dilations = block_3_ffn_inproj_dilations_0, groups = block_3_ffn_inproj_groups_0, pad = block_3_ffn_inproj_pad_0, pad_type = block_3_ffn_inproj_pad_type_0, strides = block_3_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_12, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_inproj")]; + tensor block_3_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330876736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334145408))))[name = string("block_3_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_13 = constexpr_blockwise_shift_scale(data = block_3_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334301120))))[name = string("constexpr_blockwise_shift_scale_13")]; + tensor block_3_ffn_g_strides_0 = const()[name = string("block_3_ffn_g_strides_0"), val = tensor([1])]; + string block_3_ffn_g_pad_type_0 = const()[name = string("block_3_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_3_ffn_g_pad_0 = const()[name = string("block_3_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_3_ffn_g_dilations_0 = const()[name = string("block_3_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_3_ffn_g_groups_0 = const()[name = string("block_3_ffn_g_groups_0"), val = int32(1)]; + tensor block_3_ffn_g = conv(dilations = block_3_ffn_g_dilations_0, groups = block_3_ffn_g_groups_0, pad = block_3_ffn_g_pad_0, pad_type = block_3_ffn_g_pad_type_0, strides = block_3_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_13, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_g")]; + tensor block_3_ffn_g_activation = silu(x = block_3_ffn_g)[name = string("block_3_ffn_g_activation")]; + tensor block_3_ffn_x_gated = mul(x = block_3_ffn_inproj, y = block_3_ffn_g_activation)[name = string("block_3_ffn_x_gated")]; + tensor block_3_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337579584))))[name = string("block_3_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_14 = constexpr_blockwise_shift_scale(data = block_3_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337608320))))[name = string("constexpr_blockwise_shift_scale_14")]; + tensor block_3_ffn_outproj_strides_0 = const()[name = string("block_3_ffn_outproj_strides_0"), val = tensor([1])]; + string block_3_ffn_outproj_pad_type_0 = const()[name = string("block_3_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_3_ffn_outproj_pad_0 = const()[name = string("block_3_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_3_ffn_outproj_dilations_0 = const()[name = string("block_3_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_3_ffn_outproj_groups_0 = const()[name = string("block_3_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_3_ffn_outproj = conv(dilations = block_3_ffn_outproj_dilations_0, groups = block_3_ffn_outproj_groups_0, pad = block_3_ffn_outproj_pad_0, pad_type = block_3_ffn_outproj_pad_type_0, strides = block_3_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_14, x = block_3_ffn_x_gated)[name = string("block_3_ffn_outproj")]; + tensor block_3_residual_2 = add(x = block_3_ffn_outproj, y = block_3_residual_1)[name = string("block_3_residual_2")]; + tensor block_4_attention_rmsnorm_abs = abs(x = block_3_residual_2)[name = string("block_4_attention_rmsnorm_abs")]; + tensor block_4_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_4_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_4_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_4_attention_rmsnorm_maxval = reduce_max(axes = block_4_attention_rmsnorm_maxval_axes_0, keep_dims = block_4_attention_rmsnorm_maxval_keep_dims_0, x = block_4_attention_rmsnorm_abs)[name = string("block_4_attention_rmsnorm_maxval")]; + fp16 block_4_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_4_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_4_attention_rmsnorm_maxval_clipped = clip(alpha = block_4_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_4_attention_rmsnorm_maxval_clipped_beta_0, x = block_4_attention_rmsnorm_maxval)[name = string("block_4_attention_rmsnorm_maxval_clipped")]; + tensor block_4_attention_rmsnorm_scaled = real_div(x = block_3_residual_2, y = block_4_attention_rmsnorm_maxval_clipped)[name = string("block_4_attention_rmsnorm_scaled")]; + tensor block_4_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_4_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_4_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_4_attention_rmsnorm_squared_sum_keep_dims_0, x = block_4_attention_rmsnorm_scaled)[name = string("block_4_attention_rmsnorm_squared_sum")]; + fp16 block_4_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_4_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_4_attention_rmsnorm_rsqrt_epsilon_0, x = block_4_attention_rmsnorm_squared_sum)[name = string("block_4_attention_rmsnorm_rsqrt")]; + fp16 block_4_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_4_attention_rmsnorm_dim_scaled = mul(x = block_4_attention_rmsnorm_scaled, y = block_4_attention_rmsnorm_dim_scaled_y_0)[name = string("block_4_attention_rmsnorm_dim_scaled")]; + tensor block_4_attention_rmsnorm_normalized = mul(x = block_4_attention_rmsnorm_dim_scaled, y = block_4_attention_rmsnorm_rsqrt)[name = string("block_4_attention_rmsnorm_normalized")]; + tensor block_4_attention_rmsnorm_y_0 = const()[name = string("block_4_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337610176)))]; + tensor block_4_attention_rmsnorm = mul(x = block_4_attention_rmsnorm_normalized, y = block_4_attention_rmsnorm_y_0)[name = string("block_4_attention_rmsnorm")]; + tensor attention_4_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337612032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338386240))))[name = string("attention_4_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_15 = constexpr_blockwise_shift_scale(data = attention_4_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338423168))))[name = string("constexpr_blockwise_shift_scale_15")]; + tensor attention_4_qkvproj_bias_0 = const()[name = string("attention_4_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338425536)))]; + tensor attention_4_qkvproj_strides_0 = const()[name = string("attention_4_qkvproj_strides_0"), val = tensor([1])]; + string attention_4_qkvproj_pad_type_0 = const()[name = string("attention_4_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_4_qkvproj_pad_0 = const()[name = string("attention_4_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_4_qkvproj_dilations_0 = const()[name = string("attention_4_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_4_qkvproj_groups_0 = const()[name = string("attention_4_qkvproj_groups_0"), val = int32(1)]; + tensor attention_4_qkvproj = conv(bias = attention_4_qkvproj_bias_0, dilations = attention_4_qkvproj_dilations_0, groups = attention_4_qkvproj_groups_0, pad = attention_4_qkvproj_pad_0, pad_type = attention_4_qkvproj_pad_type_0, strides = attention_4_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_15, x = block_4_attention_rmsnorm)[name = string("attention_4_qkvproj")]; + tensor attention_4_head_reshape_shape_0 = const()[name = string("attention_4_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_4_head_reshape = reshape(shape = attention_4_head_reshape_shape_0, x = attention_4_qkvproj)[name = string("attention_4_head_reshape")]; + tensor attention_4_head_transpose_perm_0 = const()[name = string("attention_4_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_4_split_qkv_heads_axis_0 = const()[name = string("attention_4_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_4_split_qkv_heads_split_sizes_0 = const()[name = string("attention_4_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_4_head_transpose = transpose(perm = attention_4_head_transpose_perm_0, x = attention_4_head_reshape)[name = string("transpose_40")]; + tensor attention_4_split_qkv_heads_0, tensor attention_4_split_qkv_heads_1, tensor attention_4_split_qkv_heads_2 = split(axis = attention_4_split_qkv_heads_axis_0, split_sizes = attention_4_split_qkv_heads_split_sizes_0, x = attention_4_head_transpose)[name = string("attention_4_split_qkv_heads")]; + tensor attention_4_q_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_4_q_rope_lhs_mult")]; + int32 attention_4_q_rotate_half_split_num_splits_0 = const()[name = string("attention_4_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_4_q_rotate_half_split_axis_0 = const()[name = string("attention_4_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_4_q_rotate_half_split_0, tensor attention_4_q_rotate_half_split_1 = split(axis = attention_4_q_rotate_half_split_axis_0, num_splits = attention_4_q_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_0)[name = string("attention_4_q_rotate_half_split")]; + fp16 attention_4_q_rotate_half_neg_y_0 = const()[name = string("attention_4_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_4_q_rotate_half_neg = mul(x = attention_4_q_rotate_half_split_1, y = attention_4_q_rotate_half_neg_y_0)[name = string("attention_4_q_rotate_half_neg")]; + int32 attention_4_q_rotate_half_concat_axis_0 = const()[name = string("attention_4_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_4_q_rotate_half_concat_interleave_0 = const()[name = string("attention_4_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_4_q_rotate_half_concat = concat(axis = attention_4_q_rotate_half_concat_axis_0, interleave = attention_4_q_rotate_half_concat_interleave_0, values = (attention_4_q_rotate_half_neg, attention_4_q_rotate_half_split_0))[name = string("attention_4_q_rotate_half_concat")]; + tensor attention_4_q_rope_rhs_mult = mul(x = attention_4_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_q_rope_rhs_mult")]; + tensor attention_4_q_rope = add(x = attention_4_q_rope_lhs_mult, y = attention_4_q_rope_rhs_mult)[name = string("attention_4_q_rope")]; + tensor attention_4_k_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_4_k_rope_lhs_mult")]; + int32 attention_4_k_rotate_half_split_num_splits_0 = const()[name = string("attention_4_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_4_k_rotate_half_split_axis_0 = const()[name = string("attention_4_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_4_k_rotate_half_split_0, tensor attention_4_k_rotate_half_split_1 = split(axis = attention_4_k_rotate_half_split_axis_0, num_splits = attention_4_k_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_1)[name = string("attention_4_k_rotate_half_split")]; + fp16 attention_4_k_rotate_half_neg_y_0 = const()[name = string("attention_4_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_4_k_rotate_half_neg = mul(x = attention_4_k_rotate_half_split_1, y = attention_4_k_rotate_half_neg_y_0)[name = string("attention_4_k_rotate_half_neg")]; + int32 attention_4_k_rotate_half_concat_axis_0 = const()[name = string("attention_4_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_4_k_rotate_half_concat_interleave_0 = const()[name = string("attention_4_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_4_k_rotate_half_concat = concat(axis = attention_4_k_rotate_half_concat_axis_0, interleave = attention_4_k_rotate_half_concat_interleave_0, values = (attention_4_k_rotate_half_neg, attention_4_k_rotate_half_split_0))[name = string("attention_4_k_rotate_half_concat")]; + tensor attention_4_k_rope_rhs_mult = mul(x = attention_4_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_k_rope_rhs_mult")]; + tensor attention_4_k_rope = add(x = attention_4_k_rope_lhs_mult, y = attention_4_k_rope_rhs_mult)[name = string("attention_4_k_rope")]; + int32 attention_4_q_splits_axis_0 = const()[name = string("attention_4_q_splits_axis_0"), val = int32(1)]; + int32 attention_4_q_splits_num_splits_0 = const()[name = string("attention_4_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_4_q_splits_0, tensor attention_4_q_splits_1 = split(axis = attention_4_q_splits_axis_0, num_splits = attention_4_q_splits_num_splits_0, x = attention_4_q_rope)[name = string("attention_4_q_splits")]; + tensor attention_4_update_begin_0_values0_0 = const()[name = string("attention_4_update_begin_0_values0_0"), val = tensor([4])]; + tensor attention_4_update_begin_0_values1_0 = const()[name = string("attention_4_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_4_update_begin_0_values3_0 = const()[name = string("attention_4_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_4_update_begin_0_axis_0 = const()[name = string("attention_4_update_begin_0_axis_0"), val = int32(0)]; + bool attention_4_update_begin_0_interleave_0 = const()[name = string("attention_4_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_4_update_begin_0 = concat(axis = attention_4_update_begin_0_axis_0, interleave = attention_4_update_begin_0_interleave_0, values = (attention_4_update_begin_0_values0_0, attention_4_update_begin_0_values1_0, query_pos1, attention_4_update_begin_0_values3_0))[name = string("attention_4_update_begin_0")]; + tensor attention_4_update_end_0_values0_0 = const()[name = string("attention_4_update_end_0_values0_0"), val = tensor([5])]; + tensor attention_4_update_end_0_values1_0 = const()[name = string("attention_4_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_4_update_end_0_values3_0 = const()[name = string("attention_4_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_4_update_end_0_axis_0 = const()[name = string("attention_4_update_end_0_axis_0"), val = int32(0)]; + bool attention_4_update_end_0_interleave_0 = const()[name = string("attention_4_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_4_update_end_0 = concat(axis = attention_4_update_end_0_axis_0, interleave = attention_4_update_end_0_interleave_0, values = (attention_4_update_end_0_values0_0, attention_4_update_end_0_values1_0, end_pos_0, attention_4_update_end_0_values3_0))[name = string("attention_4_update_end_0")]; + tensor attention_4_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_updated_key_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_key_cache_0_squeeze_mask_0, update = attention_4_k_rope, x = coreml_update_state_6)[name = string("attention_4_updated_key_cache_0")]; + write_state(data = attention_4_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_56_write_state")]; + tensor coreml_update_state_8 = read_state(input = key_cache_state)[name = string("coreml_update_state_56")]; + tensor attention_4_key_cache_begin_0 = const()[name = string("attention_4_key_cache_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor attention_4_key_cache_end_0 = const()[name = string("attention_4_key_cache_end_0"), val = tensor([5, 2, 512, 64])]; + tensor attention_4_key_cache_squeeze_mask_0 = const()[name = string("attention_4_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_key_cache = slice_by_index(begin = attention_4_key_cache_begin_0, end = attention_4_key_cache_end_0, squeeze_mask = attention_4_key_cache_squeeze_mask_0, x = coreml_update_state_8)[name = string("attention_4_key_cache")]; + int32 attention_4_key_cache_head_axis_0 = const()[name = string("attention_4_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_4_key_cache_head_num_splits_0 = const()[name = string("attention_4_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_4_key_cache_head_0, tensor attention_4_key_cache_head_1 = split(axis = attention_4_key_cache_head_axis_0, num_splits = attention_4_key_cache_head_num_splits_0, x = attention_4_key_cache)[name = string("attention_4_key_cache_head")]; + tensor attention_4_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_updated_value_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_value_cache_0_squeeze_mask_0, update = attention_4_split_qkv_heads_2, x = coreml_update_state_7)[name = string("attention_4_updated_value_cache_0")]; + write_state(data = attention_4_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_57_write_state")]; + tensor coreml_update_state_9 = read_state(input = value_cache_state)[name = string("coreml_update_state_57")]; + tensor attention_4_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_4_slice_current_layer_value_cache_begin_0"), val = tensor([4, 0, 0, 0])]; + tensor attention_4_slice_current_layer_value_cache_end_0 = const()[name = string("attention_4_slice_current_layer_value_cache_end_0"), val = tensor([5, 2, 512, 64])]; + tensor attention_4_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_4_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_4_slice_current_layer_value_cache = slice_by_index(begin = attention_4_slice_current_layer_value_cache_begin_0, end = attention_4_slice_current_layer_value_cache_end_0, squeeze_mask = attention_4_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_9)[name = string("attention_4_slice_current_layer_value_cache")]; + int32 attention_4_slice_value_cache_heads_axis_0 = const()[name = string("attention_4_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_4_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_4_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_4_slice_value_cache_heads_0, tensor attention_4_slice_value_cache_heads_1 = split(axis = attention_4_slice_value_cache_heads_axis_0, num_splits = attention_4_slice_value_cache_heads_num_splits_0, x = attention_4_slice_current_layer_value_cache)[name = string("attention_4_slice_value_cache_heads")]; + bool attention_4_scores_0_transpose_y_0 = const()[name = string("attention_4_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_4_scores_0_transpose_x_0 = const()[name = string("attention_4_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_4_scores_0 = matmul(transpose_x = attention_4_scores_0_transpose_x_0, transpose_y = attention_4_scores_0_transpose_y_0, x = attention_4_key_cache_head_0, y = attention_4_q_splits_0)[name = string("attention_4_scores_0")]; + fp16 attention_4_scaled_scores_0_y_0 = const()[name = string("attention_4_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_4_scaled_scores_0 = mul(x = attention_4_scores_0, y = attention_4_scaled_scores_0_y_0)[name = string("attention_4_scaled_scores_0")]; + tensor attention_4_masked_scaled_scores_0 = add(x = attention_4_scaled_scores_0, y = transpose_0)[name = string("attention_4_masked_scaled_scores_0")]; + int32 softmax_8_axis_0 = const()[name = string("softmax_8_axis_0"), val = int32(-2)]; + tensor softmax_8 = softmax(axis = softmax_8_axis_0, x = attention_4_masked_scaled_scores_0)[name = string("softmax_8")]; + bool attention_4_attention_0_transpose_x_0 = const()[name = string("attention_4_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_4_attention_0_transpose_y_0 = const()[name = string("attention_4_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_4_attention_0 = matmul(transpose_x = attention_4_attention_0_transpose_x_0, transpose_y = attention_4_attention_0_transpose_y_0, x = softmax_8, y = attention_4_slice_value_cache_heads_0)[name = string("attention_4_attention_0")]; + bool attention_4_scores_1_transpose_y_0 = const()[name = string("attention_4_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_4_scores_1_transpose_x_0 = const()[name = string("attention_4_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_4_scores_1 = matmul(transpose_x = attention_4_scores_1_transpose_x_0, transpose_y = attention_4_scores_1_transpose_y_0, x = attention_4_key_cache_head_1, y = attention_4_q_splits_1)[name = string("attention_4_scores_1")]; + fp16 attention_4_scaled_scores_1_y_0 = const()[name = string("attention_4_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_4_scaled_scores_1 = mul(x = attention_4_scores_1, y = attention_4_scaled_scores_1_y_0)[name = string("attention_4_scaled_scores_1")]; + tensor attention_4_masked_scaled_scores_1 = add(x = attention_4_scaled_scores_1, y = transpose_0)[name = string("attention_4_masked_scaled_scores_1")]; + int32 softmax_9_axis_0 = const()[name = string("softmax_9_axis_0"), val = int32(-2)]; + tensor softmax_9 = softmax(axis = softmax_9_axis_0, x = attention_4_masked_scaled_scores_1)[name = string("softmax_9")]; + bool attention_4_attention_1_transpose_x_0 = const()[name = string("attention_4_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_4_attention_1_transpose_y_0 = const()[name = string("attention_4_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_4_attention_1 = matmul(transpose_x = attention_4_attention_1_transpose_x_0, transpose_y = attention_4_attention_1_transpose_y_0, x = softmax_9, y = attention_4_slice_value_cache_heads_1)[name = string("attention_4_attention_1")]; + int32 attention_4_concat_attention_all_heads_axis_0 = const()[name = string("attention_4_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_4_concat_attention_all_heads_interleave_0 = const()[name = string("attention_4_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_4_concat_attention_all_heads = concat(axis = attention_4_concat_attention_all_heads_axis_0, interleave = attention_4_concat_attention_all_heads_interleave_0, values = (attention_4_attention_0, attention_4_attention_1))[name = string("attention_4_concat_attention_all_heads")]; + tensor attention_4_channels_first_retransposed_perm_0 = const()[name = string("attention_4_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_4_reshaped_shape_0 = const()[name = string("attention_4_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_4_channels_first_retransposed = transpose(perm = attention_4_channels_first_retransposed_perm_0, x = attention_4_concat_attention_all_heads)[name = string("transpose_39")]; + tensor attention_4_reshaped = reshape(shape = attention_4_reshaped_shape_0, x = attention_4_channels_first_retransposed)[name = string("attention_4_reshaped")]; + tensor attention_4_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338427904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339030080))))[name = string("attention_4_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_16 = constexpr_blockwise_shift_scale(data = attention_4_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339058816))))[name = string("constexpr_blockwise_shift_scale_16")]; + tensor attention_4_outproj_strides_0 = const()[name = string("attention_4_outproj_strides_0"), val = tensor([1])]; + string attention_4_outproj_pad_type_0 = const()[name = string("attention_4_outproj_pad_type_0"), val = string("valid")]; + tensor attention_4_outproj_pad_0 = const()[name = string("attention_4_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_4_outproj_dilations_0 = const()[name = string("attention_4_outproj_dilations_0"), val = tensor([1])]; + int32 attention_4_outproj_groups_0 = const()[name = string("attention_4_outproj_groups_0"), val = int32(1)]; + tensor attention_4_outproj = conv(dilations = attention_4_outproj_dilations_0, groups = attention_4_outproj_groups_0, pad = attention_4_outproj_pad_0, pad_type = attention_4_outproj_pad_type_0, strides = attention_4_outproj_strides_0, weight = constexpr_blockwise_shift_scale_16, x = attention_4_reshaped)[name = string("attention_4_outproj")]; + tensor block_4_residual_1 = add(x = block_3_residual_2, y = attention_4_outproj)[name = string("block_4_residual_1")]; + tensor block_4_ffn_rmsnorm_abs = abs(x = block_4_residual_1)[name = string("block_4_ffn_rmsnorm_abs")]; + tensor block_4_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_4_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_4_ffn_rmsnorm_maxval = reduce_max(axes = block_4_ffn_rmsnorm_maxval_axes_0, keep_dims = block_4_ffn_rmsnorm_maxval_keep_dims_0, x = block_4_ffn_rmsnorm_abs)[name = string("block_4_ffn_rmsnorm_maxval")]; + fp16 block_4_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_4_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_4_ffn_rmsnorm_maxval_clipped = clip(alpha = block_4_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_4_ffn_rmsnorm_maxval_clipped_beta_0, x = block_4_ffn_rmsnorm_maxval)[name = string("block_4_ffn_rmsnorm_maxval_clipped")]; + tensor block_4_ffn_rmsnorm_scaled = real_div(x = block_4_residual_1, y = block_4_ffn_rmsnorm_maxval_clipped)[name = string("block_4_ffn_rmsnorm_scaled")]; + tensor block_4_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_4_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_4_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_4_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_4_ffn_rmsnorm_scaled)[name = string("block_4_ffn_rmsnorm_squared_sum")]; + fp16 block_4_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_4_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_4_ffn_rmsnorm_rsqrt_epsilon_0, x = block_4_ffn_rmsnorm_squared_sum)[name = string("block_4_ffn_rmsnorm_rsqrt")]; + fp16 block_4_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_4_ffn_rmsnorm_dim_scaled = mul(x = block_4_ffn_rmsnorm_scaled, y = block_4_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_4_ffn_rmsnorm_dim_scaled")]; + tensor block_4_ffn_rmsnorm_normalized = mul(x = block_4_ffn_rmsnorm_dim_scaled, y = block_4_ffn_rmsnorm_rsqrt)[name = string("block_4_ffn_rmsnorm_normalized")]; + tensor block_4_ffn_rmsnorm_y_0 = const()[name = string("block_4_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339060672)))]; + tensor block_4_ffn_rmsnorm = mul(x = block_4_ffn_rmsnorm_normalized, y = block_4_ffn_rmsnorm_y_0)[name = string("block_4_ffn_rmsnorm")]; + tensor block_4_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339062528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342331200))))[name = string("block_4_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_17 = constexpr_blockwise_shift_scale(data = block_4_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342486912))))[name = string("constexpr_blockwise_shift_scale_17")]; + tensor block_4_ffn_inproj_strides_0 = const()[name = string("block_4_ffn_inproj_strides_0"), val = tensor([1])]; + string block_4_ffn_inproj_pad_type_0 = const()[name = string("block_4_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_4_ffn_inproj_pad_0 = const()[name = string("block_4_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_4_ffn_inproj_dilations_0 = const()[name = string("block_4_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_4_ffn_inproj_groups_0 = const()[name = string("block_4_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_4_ffn_inproj = conv(dilations = block_4_ffn_inproj_dilations_0, groups = block_4_ffn_inproj_groups_0, pad = block_4_ffn_inproj_pad_0, pad_type = block_4_ffn_inproj_pad_type_0, strides = block_4_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_17, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_inproj")]; + tensor block_4_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342496704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345765376))))[name = string("block_4_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_18 = constexpr_blockwise_shift_scale(data = block_4_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345921088))))[name = string("constexpr_blockwise_shift_scale_18")]; + tensor block_4_ffn_g_strides_0 = const()[name = string("block_4_ffn_g_strides_0"), val = tensor([1])]; + string block_4_ffn_g_pad_type_0 = const()[name = string("block_4_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_4_ffn_g_pad_0 = const()[name = string("block_4_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_4_ffn_g_dilations_0 = const()[name = string("block_4_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_4_ffn_g_groups_0 = const()[name = string("block_4_ffn_g_groups_0"), val = int32(1)]; + tensor block_4_ffn_g = conv(dilations = block_4_ffn_g_dilations_0, groups = block_4_ffn_g_groups_0, pad = block_4_ffn_g_pad_0, pad_type = block_4_ffn_g_pad_type_0, strides = block_4_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_18, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_g")]; + tensor block_4_ffn_g_activation = silu(x = block_4_ffn_g)[name = string("block_4_ffn_g_activation")]; + tensor block_4_ffn_x_gated = mul(x = block_4_ffn_inproj, y = block_4_ffn_g_activation)[name = string("block_4_ffn_x_gated")]; + tensor block_4_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345930880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349199552))))[name = string("block_4_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_19 = constexpr_blockwise_shift_scale(data = block_4_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349228288))))[name = string("constexpr_blockwise_shift_scale_19")]; + tensor block_4_ffn_outproj_strides_0 = const()[name = string("block_4_ffn_outproj_strides_0"), val = tensor([1])]; + string block_4_ffn_outproj_pad_type_0 = const()[name = string("block_4_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_4_ffn_outproj_pad_0 = const()[name = string("block_4_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_4_ffn_outproj_dilations_0 = const()[name = string("block_4_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_4_ffn_outproj_groups_0 = const()[name = string("block_4_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_4_ffn_outproj = conv(dilations = block_4_ffn_outproj_dilations_0, groups = block_4_ffn_outproj_groups_0, pad = block_4_ffn_outproj_pad_0, pad_type = block_4_ffn_outproj_pad_type_0, strides = block_4_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_19, x = block_4_ffn_x_gated)[name = string("block_4_ffn_outproj")]; + tensor block_4_residual_2 = add(x = block_4_ffn_outproj, y = block_4_residual_1)[name = string("block_4_residual_2")]; + tensor block_5_attention_rmsnorm_abs = abs(x = block_4_residual_2)[name = string("block_5_attention_rmsnorm_abs")]; + tensor block_5_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_5_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_5_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_5_attention_rmsnorm_maxval = reduce_max(axes = block_5_attention_rmsnorm_maxval_axes_0, keep_dims = block_5_attention_rmsnorm_maxval_keep_dims_0, x = block_5_attention_rmsnorm_abs)[name = string("block_5_attention_rmsnorm_maxval")]; + fp16 block_5_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_5_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_5_attention_rmsnorm_maxval_clipped = clip(alpha = block_5_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_5_attention_rmsnorm_maxval_clipped_beta_0, x = block_5_attention_rmsnorm_maxval)[name = string("block_5_attention_rmsnorm_maxval_clipped")]; + tensor block_5_attention_rmsnorm_scaled = real_div(x = block_4_residual_2, y = block_5_attention_rmsnorm_maxval_clipped)[name = string("block_5_attention_rmsnorm_scaled")]; + tensor block_5_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_5_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_5_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_5_attention_rmsnorm_squared_sum_keep_dims_0, x = block_5_attention_rmsnorm_scaled)[name = string("block_5_attention_rmsnorm_squared_sum")]; + fp16 block_5_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_5_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_5_attention_rmsnorm_rsqrt_epsilon_0, x = block_5_attention_rmsnorm_squared_sum)[name = string("block_5_attention_rmsnorm_rsqrt")]; + fp16 block_5_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_5_attention_rmsnorm_dim_scaled = mul(x = block_5_attention_rmsnorm_scaled, y = block_5_attention_rmsnorm_dim_scaled_y_0)[name = string("block_5_attention_rmsnorm_dim_scaled")]; + tensor block_5_attention_rmsnorm_normalized = mul(x = block_5_attention_rmsnorm_dim_scaled, y = block_5_attention_rmsnorm_rsqrt)[name = string("block_5_attention_rmsnorm_normalized")]; + tensor block_5_attention_rmsnorm_y_0 = const()[name = string("block_5_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349230144)))]; + tensor block_5_attention_rmsnorm = mul(x = block_5_attention_rmsnorm_normalized, y = block_5_attention_rmsnorm_y_0)[name = string("block_5_attention_rmsnorm")]; + tensor attention_5_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349232000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350006208))))[name = string("attention_5_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_20 = constexpr_blockwise_shift_scale(data = attention_5_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350043136))))[name = string("constexpr_blockwise_shift_scale_20")]; + tensor attention_5_qkvproj_bias_0 = const()[name = string("attention_5_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350045504)))]; + tensor attention_5_qkvproj_strides_0 = const()[name = string("attention_5_qkvproj_strides_0"), val = tensor([1])]; + string attention_5_qkvproj_pad_type_0 = const()[name = string("attention_5_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_5_qkvproj_pad_0 = const()[name = string("attention_5_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_5_qkvproj_dilations_0 = const()[name = string("attention_5_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_5_qkvproj_groups_0 = const()[name = string("attention_5_qkvproj_groups_0"), val = int32(1)]; + tensor attention_5_qkvproj = conv(bias = attention_5_qkvproj_bias_0, dilations = attention_5_qkvproj_dilations_0, groups = attention_5_qkvproj_groups_0, pad = attention_5_qkvproj_pad_0, pad_type = attention_5_qkvproj_pad_type_0, strides = attention_5_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_20, x = block_5_attention_rmsnorm)[name = string("attention_5_qkvproj")]; + tensor attention_5_head_reshape_shape_0 = const()[name = string("attention_5_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_5_head_reshape = reshape(shape = attention_5_head_reshape_shape_0, x = attention_5_qkvproj)[name = string("attention_5_head_reshape")]; + tensor attention_5_head_transpose_perm_0 = const()[name = string("attention_5_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_5_split_qkv_heads_axis_0 = const()[name = string("attention_5_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_5_split_qkv_heads_split_sizes_0 = const()[name = string("attention_5_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_5_head_transpose = transpose(perm = attention_5_head_transpose_perm_0, x = attention_5_head_reshape)[name = string("transpose_38")]; + tensor attention_5_split_qkv_heads_0, tensor attention_5_split_qkv_heads_1, tensor attention_5_split_qkv_heads_2 = split(axis = attention_5_split_qkv_heads_axis_0, split_sizes = attention_5_split_qkv_heads_split_sizes_0, x = attention_5_head_transpose)[name = string("attention_5_split_qkv_heads")]; + tensor attention_5_q_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_5_q_rope_lhs_mult")]; + int32 attention_5_q_rotate_half_split_num_splits_0 = const()[name = string("attention_5_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_5_q_rotate_half_split_axis_0 = const()[name = string("attention_5_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_5_q_rotate_half_split_0, tensor attention_5_q_rotate_half_split_1 = split(axis = attention_5_q_rotate_half_split_axis_0, num_splits = attention_5_q_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_0)[name = string("attention_5_q_rotate_half_split")]; + fp16 attention_5_q_rotate_half_neg_y_0 = const()[name = string("attention_5_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_5_q_rotate_half_neg = mul(x = attention_5_q_rotate_half_split_1, y = attention_5_q_rotate_half_neg_y_0)[name = string("attention_5_q_rotate_half_neg")]; + int32 attention_5_q_rotate_half_concat_axis_0 = const()[name = string("attention_5_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_5_q_rotate_half_concat_interleave_0 = const()[name = string("attention_5_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_5_q_rotate_half_concat = concat(axis = attention_5_q_rotate_half_concat_axis_0, interleave = attention_5_q_rotate_half_concat_interleave_0, values = (attention_5_q_rotate_half_neg, attention_5_q_rotate_half_split_0))[name = string("attention_5_q_rotate_half_concat")]; + tensor attention_5_q_rope_rhs_mult = mul(x = attention_5_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_q_rope_rhs_mult")]; + tensor attention_5_q_rope = add(x = attention_5_q_rope_lhs_mult, y = attention_5_q_rope_rhs_mult)[name = string("attention_5_q_rope")]; + tensor attention_5_k_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_5_k_rope_lhs_mult")]; + int32 attention_5_k_rotate_half_split_num_splits_0 = const()[name = string("attention_5_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_5_k_rotate_half_split_axis_0 = const()[name = string("attention_5_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_5_k_rotate_half_split_0, tensor attention_5_k_rotate_half_split_1 = split(axis = attention_5_k_rotate_half_split_axis_0, num_splits = attention_5_k_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_1)[name = string("attention_5_k_rotate_half_split")]; + fp16 attention_5_k_rotate_half_neg_y_0 = const()[name = string("attention_5_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_5_k_rotate_half_neg = mul(x = attention_5_k_rotate_half_split_1, y = attention_5_k_rotate_half_neg_y_0)[name = string("attention_5_k_rotate_half_neg")]; + int32 attention_5_k_rotate_half_concat_axis_0 = const()[name = string("attention_5_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_5_k_rotate_half_concat_interleave_0 = const()[name = string("attention_5_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_5_k_rotate_half_concat = concat(axis = attention_5_k_rotate_half_concat_axis_0, interleave = attention_5_k_rotate_half_concat_interleave_0, values = (attention_5_k_rotate_half_neg, attention_5_k_rotate_half_split_0))[name = string("attention_5_k_rotate_half_concat")]; + tensor attention_5_k_rope_rhs_mult = mul(x = attention_5_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_k_rope_rhs_mult")]; + tensor attention_5_k_rope = add(x = attention_5_k_rope_lhs_mult, y = attention_5_k_rope_rhs_mult)[name = string("attention_5_k_rope")]; + int32 attention_5_q_splits_axis_0 = const()[name = string("attention_5_q_splits_axis_0"), val = int32(1)]; + int32 attention_5_q_splits_num_splits_0 = const()[name = string("attention_5_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_5_q_splits_0, tensor attention_5_q_splits_1 = split(axis = attention_5_q_splits_axis_0, num_splits = attention_5_q_splits_num_splits_0, x = attention_5_q_rope)[name = string("attention_5_q_splits")]; + tensor attention_5_update_begin_0_values0_0 = const()[name = string("attention_5_update_begin_0_values0_0"), val = tensor([5])]; + tensor attention_5_update_begin_0_values1_0 = const()[name = string("attention_5_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_5_update_begin_0_values3_0 = const()[name = string("attention_5_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_5_update_begin_0_axis_0 = const()[name = string("attention_5_update_begin_0_axis_0"), val = int32(0)]; + bool attention_5_update_begin_0_interleave_0 = const()[name = string("attention_5_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_5_update_begin_0 = concat(axis = attention_5_update_begin_0_axis_0, interleave = attention_5_update_begin_0_interleave_0, values = (attention_5_update_begin_0_values0_0, attention_5_update_begin_0_values1_0, query_pos1, attention_5_update_begin_0_values3_0))[name = string("attention_5_update_begin_0")]; + tensor attention_5_update_end_0_values0_0 = const()[name = string("attention_5_update_end_0_values0_0"), val = tensor([6])]; + tensor attention_5_update_end_0_values1_0 = const()[name = string("attention_5_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_5_update_end_0_values3_0 = const()[name = string("attention_5_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_5_update_end_0_axis_0 = const()[name = string("attention_5_update_end_0_axis_0"), val = int32(0)]; + bool attention_5_update_end_0_interleave_0 = const()[name = string("attention_5_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_5_update_end_0 = concat(axis = attention_5_update_end_0_axis_0, interleave = attention_5_update_end_0_interleave_0, values = (attention_5_update_end_0_values0_0, attention_5_update_end_0_values1_0, end_pos_0, attention_5_update_end_0_values3_0))[name = string("attention_5_update_end_0")]; + tensor attention_5_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_updated_key_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_key_cache_0_squeeze_mask_0, update = attention_5_k_rope, x = coreml_update_state_8)[name = string("attention_5_updated_key_cache_0")]; + write_state(data = attention_5_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_58_write_state")]; + tensor coreml_update_state_10 = read_state(input = key_cache_state)[name = string("coreml_update_state_58")]; + tensor attention_5_key_cache_begin_0 = const()[name = string("attention_5_key_cache_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor attention_5_key_cache_end_0 = const()[name = string("attention_5_key_cache_end_0"), val = tensor([6, 2, 512, 64])]; + tensor attention_5_key_cache_squeeze_mask_0 = const()[name = string("attention_5_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_key_cache = slice_by_index(begin = attention_5_key_cache_begin_0, end = attention_5_key_cache_end_0, squeeze_mask = attention_5_key_cache_squeeze_mask_0, x = coreml_update_state_10)[name = string("attention_5_key_cache")]; + int32 attention_5_key_cache_head_axis_0 = const()[name = string("attention_5_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_5_key_cache_head_num_splits_0 = const()[name = string("attention_5_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_5_key_cache_head_0, tensor attention_5_key_cache_head_1 = split(axis = attention_5_key_cache_head_axis_0, num_splits = attention_5_key_cache_head_num_splits_0, x = attention_5_key_cache)[name = string("attention_5_key_cache_head")]; + tensor attention_5_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_updated_value_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_value_cache_0_squeeze_mask_0, update = attention_5_split_qkv_heads_2, x = coreml_update_state_9)[name = string("attention_5_updated_value_cache_0")]; + write_state(data = attention_5_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_59_write_state")]; + tensor coreml_update_state_11 = read_state(input = value_cache_state)[name = string("coreml_update_state_59")]; + tensor attention_5_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_5_slice_current_layer_value_cache_begin_0"), val = tensor([5, 0, 0, 0])]; + tensor attention_5_slice_current_layer_value_cache_end_0 = const()[name = string("attention_5_slice_current_layer_value_cache_end_0"), val = tensor([6, 2, 512, 64])]; + tensor attention_5_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_5_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_5_slice_current_layer_value_cache = slice_by_index(begin = attention_5_slice_current_layer_value_cache_begin_0, end = attention_5_slice_current_layer_value_cache_end_0, squeeze_mask = attention_5_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_11)[name = string("attention_5_slice_current_layer_value_cache")]; + int32 attention_5_slice_value_cache_heads_axis_0 = const()[name = string("attention_5_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_5_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_5_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_5_slice_value_cache_heads_0, tensor attention_5_slice_value_cache_heads_1 = split(axis = attention_5_slice_value_cache_heads_axis_0, num_splits = attention_5_slice_value_cache_heads_num_splits_0, x = attention_5_slice_current_layer_value_cache)[name = string("attention_5_slice_value_cache_heads")]; + bool attention_5_scores_0_transpose_y_0 = const()[name = string("attention_5_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_5_scores_0_transpose_x_0 = const()[name = string("attention_5_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_5_scores_0 = matmul(transpose_x = attention_5_scores_0_transpose_x_0, transpose_y = attention_5_scores_0_transpose_y_0, x = attention_5_key_cache_head_0, y = attention_5_q_splits_0)[name = string("attention_5_scores_0")]; + fp16 attention_5_scaled_scores_0_y_0 = const()[name = string("attention_5_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_5_scaled_scores_0 = mul(x = attention_5_scores_0, y = attention_5_scaled_scores_0_y_0)[name = string("attention_5_scaled_scores_0")]; + tensor attention_5_masked_scaled_scores_0 = add(x = attention_5_scaled_scores_0, y = transpose_0)[name = string("attention_5_masked_scaled_scores_0")]; + int32 softmax_10_axis_0 = const()[name = string("softmax_10_axis_0"), val = int32(-2)]; + tensor softmax_10 = softmax(axis = softmax_10_axis_0, x = attention_5_masked_scaled_scores_0)[name = string("softmax_10")]; + bool attention_5_attention_0_transpose_x_0 = const()[name = string("attention_5_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_5_attention_0_transpose_y_0 = const()[name = string("attention_5_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_5_attention_0 = matmul(transpose_x = attention_5_attention_0_transpose_x_0, transpose_y = attention_5_attention_0_transpose_y_0, x = softmax_10, y = attention_5_slice_value_cache_heads_0)[name = string("attention_5_attention_0")]; + bool attention_5_scores_1_transpose_y_0 = const()[name = string("attention_5_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_5_scores_1_transpose_x_0 = const()[name = string("attention_5_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_5_scores_1 = matmul(transpose_x = attention_5_scores_1_transpose_x_0, transpose_y = attention_5_scores_1_transpose_y_0, x = attention_5_key_cache_head_1, y = attention_5_q_splits_1)[name = string("attention_5_scores_1")]; + fp16 attention_5_scaled_scores_1_y_0 = const()[name = string("attention_5_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_5_scaled_scores_1 = mul(x = attention_5_scores_1, y = attention_5_scaled_scores_1_y_0)[name = string("attention_5_scaled_scores_1")]; + tensor attention_5_masked_scaled_scores_1 = add(x = attention_5_scaled_scores_1, y = transpose_0)[name = string("attention_5_masked_scaled_scores_1")]; + int32 softmax_11_axis_0 = const()[name = string("softmax_11_axis_0"), val = int32(-2)]; + tensor softmax_11 = softmax(axis = softmax_11_axis_0, x = attention_5_masked_scaled_scores_1)[name = string("softmax_11")]; + bool attention_5_attention_1_transpose_x_0 = const()[name = string("attention_5_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_5_attention_1_transpose_y_0 = const()[name = string("attention_5_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_5_attention_1 = matmul(transpose_x = attention_5_attention_1_transpose_x_0, transpose_y = attention_5_attention_1_transpose_y_0, x = softmax_11, y = attention_5_slice_value_cache_heads_1)[name = string("attention_5_attention_1")]; + int32 attention_5_concat_attention_all_heads_axis_0 = const()[name = string("attention_5_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_5_concat_attention_all_heads_interleave_0 = const()[name = string("attention_5_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_5_concat_attention_all_heads = concat(axis = attention_5_concat_attention_all_heads_axis_0, interleave = attention_5_concat_attention_all_heads_interleave_0, values = (attention_5_attention_0, attention_5_attention_1))[name = string("attention_5_concat_attention_all_heads")]; + tensor attention_5_channels_first_retransposed_perm_0 = const()[name = string("attention_5_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_5_reshaped_shape_0 = const()[name = string("attention_5_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_5_channels_first_retransposed = transpose(perm = attention_5_channels_first_retransposed_perm_0, x = attention_5_concat_attention_all_heads)[name = string("transpose_37")]; + tensor attention_5_reshaped = reshape(shape = attention_5_reshaped_shape_0, x = attention_5_channels_first_retransposed)[name = string("attention_5_reshaped")]; + tensor attention_5_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350047872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350650048))))[name = string("attention_5_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_21 = constexpr_blockwise_shift_scale(data = attention_5_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350678784))))[name = string("constexpr_blockwise_shift_scale_21")]; + tensor attention_5_outproj_strides_0 = const()[name = string("attention_5_outproj_strides_0"), val = tensor([1])]; + string attention_5_outproj_pad_type_0 = const()[name = string("attention_5_outproj_pad_type_0"), val = string("valid")]; + tensor attention_5_outproj_pad_0 = const()[name = string("attention_5_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_5_outproj_dilations_0 = const()[name = string("attention_5_outproj_dilations_0"), val = tensor([1])]; + int32 attention_5_outproj_groups_0 = const()[name = string("attention_5_outproj_groups_0"), val = int32(1)]; + tensor attention_5_outproj = conv(dilations = attention_5_outproj_dilations_0, groups = attention_5_outproj_groups_0, pad = attention_5_outproj_pad_0, pad_type = attention_5_outproj_pad_type_0, strides = attention_5_outproj_strides_0, weight = constexpr_blockwise_shift_scale_21, x = attention_5_reshaped)[name = string("attention_5_outproj")]; + tensor block_5_residual_1 = add(x = block_4_residual_2, y = attention_5_outproj)[name = string("block_5_residual_1")]; + tensor block_5_ffn_rmsnorm_abs = abs(x = block_5_residual_1)[name = string("block_5_ffn_rmsnorm_abs")]; + tensor block_5_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_5_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_5_ffn_rmsnorm_maxval = reduce_max(axes = block_5_ffn_rmsnorm_maxval_axes_0, keep_dims = block_5_ffn_rmsnorm_maxval_keep_dims_0, x = block_5_ffn_rmsnorm_abs)[name = string("block_5_ffn_rmsnorm_maxval")]; + fp16 block_5_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_5_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_5_ffn_rmsnorm_maxval_clipped = clip(alpha = block_5_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_5_ffn_rmsnorm_maxval_clipped_beta_0, x = block_5_ffn_rmsnorm_maxval)[name = string("block_5_ffn_rmsnorm_maxval_clipped")]; + tensor block_5_ffn_rmsnorm_scaled = real_div(x = block_5_residual_1, y = block_5_ffn_rmsnorm_maxval_clipped)[name = string("block_5_ffn_rmsnorm_scaled")]; + tensor block_5_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_5_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_5_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_5_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_5_ffn_rmsnorm_scaled)[name = string("block_5_ffn_rmsnorm_squared_sum")]; + fp16 block_5_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_5_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_5_ffn_rmsnorm_rsqrt_epsilon_0, x = block_5_ffn_rmsnorm_squared_sum)[name = string("block_5_ffn_rmsnorm_rsqrt")]; + fp16 block_5_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_5_ffn_rmsnorm_dim_scaled = mul(x = block_5_ffn_rmsnorm_scaled, y = block_5_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_5_ffn_rmsnorm_dim_scaled")]; + tensor block_5_ffn_rmsnorm_normalized = mul(x = block_5_ffn_rmsnorm_dim_scaled, y = block_5_ffn_rmsnorm_rsqrt)[name = string("block_5_ffn_rmsnorm_normalized")]; + tensor block_5_ffn_rmsnorm_y_0 = const()[name = string("block_5_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350680640)))]; + tensor block_5_ffn_rmsnorm = mul(x = block_5_ffn_rmsnorm_normalized, y = block_5_ffn_rmsnorm_y_0)[name = string("block_5_ffn_rmsnorm")]; + tensor block_5_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350682496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353951168))))[name = string("block_5_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_22 = constexpr_blockwise_shift_scale(data = block_5_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354106880))))[name = string("constexpr_blockwise_shift_scale_22")]; + tensor block_5_ffn_inproj_strides_0 = const()[name = string("block_5_ffn_inproj_strides_0"), val = tensor([1])]; + string block_5_ffn_inproj_pad_type_0 = const()[name = string("block_5_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_5_ffn_inproj_pad_0 = const()[name = string("block_5_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_5_ffn_inproj_dilations_0 = const()[name = string("block_5_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_5_ffn_inproj_groups_0 = const()[name = string("block_5_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_5_ffn_inproj = conv(dilations = block_5_ffn_inproj_dilations_0, groups = block_5_ffn_inproj_groups_0, pad = block_5_ffn_inproj_pad_0, pad_type = block_5_ffn_inproj_pad_type_0, strides = block_5_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_22, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_inproj")]; + tensor block_5_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354116672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357385344))))[name = string("block_5_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_23 = constexpr_blockwise_shift_scale(data = block_5_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357541056))))[name = string("constexpr_blockwise_shift_scale_23")]; + tensor block_5_ffn_g_strides_0 = const()[name = string("block_5_ffn_g_strides_0"), val = tensor([1])]; + string block_5_ffn_g_pad_type_0 = const()[name = string("block_5_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_5_ffn_g_pad_0 = const()[name = string("block_5_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_5_ffn_g_dilations_0 = const()[name = string("block_5_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_5_ffn_g_groups_0 = const()[name = string("block_5_ffn_g_groups_0"), val = int32(1)]; + tensor block_5_ffn_g = conv(dilations = block_5_ffn_g_dilations_0, groups = block_5_ffn_g_groups_0, pad = block_5_ffn_g_pad_0, pad_type = block_5_ffn_g_pad_type_0, strides = block_5_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_23, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_g")]; + tensor block_5_ffn_g_activation = silu(x = block_5_ffn_g)[name = string("block_5_ffn_g_activation")]; + tensor block_5_ffn_x_gated = mul(x = block_5_ffn_inproj, y = block_5_ffn_g_activation)[name = string("block_5_ffn_x_gated")]; + tensor block_5_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357550848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360819520))))[name = string("block_5_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_24 = constexpr_blockwise_shift_scale(data = block_5_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360848256))))[name = string("constexpr_blockwise_shift_scale_24")]; + tensor block_5_ffn_outproj_strides_0 = const()[name = string("block_5_ffn_outproj_strides_0"), val = tensor([1])]; + string block_5_ffn_outproj_pad_type_0 = const()[name = string("block_5_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_5_ffn_outproj_pad_0 = const()[name = string("block_5_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_5_ffn_outproj_dilations_0 = const()[name = string("block_5_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_5_ffn_outproj_groups_0 = const()[name = string("block_5_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_5_ffn_outproj = conv(dilations = block_5_ffn_outproj_dilations_0, groups = block_5_ffn_outproj_groups_0, pad = block_5_ffn_outproj_pad_0, pad_type = block_5_ffn_outproj_pad_type_0, strides = block_5_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_24, x = block_5_ffn_x_gated)[name = string("block_5_ffn_outproj")]; + tensor block_5_residual_2 = add(x = block_5_ffn_outproj, y = block_5_residual_1)[name = string("block_5_residual_2")]; + tensor block_6_attention_rmsnorm_abs = abs(x = block_5_residual_2)[name = string("block_6_attention_rmsnorm_abs")]; + tensor block_6_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_6_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_6_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_6_attention_rmsnorm_maxval = reduce_max(axes = block_6_attention_rmsnorm_maxval_axes_0, keep_dims = block_6_attention_rmsnorm_maxval_keep_dims_0, x = block_6_attention_rmsnorm_abs)[name = string("block_6_attention_rmsnorm_maxval")]; + fp16 block_6_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_6_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_6_attention_rmsnorm_maxval_clipped = clip(alpha = block_6_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_6_attention_rmsnorm_maxval_clipped_beta_0, x = block_6_attention_rmsnorm_maxval)[name = string("block_6_attention_rmsnorm_maxval_clipped")]; + tensor block_6_attention_rmsnorm_scaled = real_div(x = block_5_residual_2, y = block_6_attention_rmsnorm_maxval_clipped)[name = string("block_6_attention_rmsnorm_scaled")]; + tensor block_6_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_6_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_6_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_6_attention_rmsnorm_squared_sum_keep_dims_0, x = block_6_attention_rmsnorm_scaled)[name = string("block_6_attention_rmsnorm_squared_sum")]; + fp16 block_6_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_6_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_6_attention_rmsnorm_rsqrt_epsilon_0, x = block_6_attention_rmsnorm_squared_sum)[name = string("block_6_attention_rmsnorm_rsqrt")]; + fp16 block_6_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_6_attention_rmsnorm_dim_scaled = mul(x = block_6_attention_rmsnorm_scaled, y = block_6_attention_rmsnorm_dim_scaled_y_0)[name = string("block_6_attention_rmsnorm_dim_scaled")]; + tensor block_6_attention_rmsnorm_normalized = mul(x = block_6_attention_rmsnorm_dim_scaled, y = block_6_attention_rmsnorm_rsqrt)[name = string("block_6_attention_rmsnorm_normalized")]; + tensor block_6_attention_rmsnorm_y_0 = const()[name = string("block_6_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360850112)))]; + tensor block_6_attention_rmsnorm = mul(x = block_6_attention_rmsnorm_normalized, y = block_6_attention_rmsnorm_y_0)[name = string("block_6_attention_rmsnorm")]; + tensor attention_6_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360851968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361626176))))[name = string("attention_6_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_25 = constexpr_blockwise_shift_scale(data = attention_6_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361663104))))[name = string("constexpr_blockwise_shift_scale_25")]; + tensor attention_6_qkvproj_bias_0 = const()[name = string("attention_6_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361665472)))]; + tensor attention_6_qkvproj_strides_0 = const()[name = string("attention_6_qkvproj_strides_0"), val = tensor([1])]; + string attention_6_qkvproj_pad_type_0 = const()[name = string("attention_6_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_6_qkvproj_pad_0 = const()[name = string("attention_6_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_6_qkvproj_dilations_0 = const()[name = string("attention_6_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_6_qkvproj_groups_0 = const()[name = string("attention_6_qkvproj_groups_0"), val = int32(1)]; + tensor attention_6_qkvproj = conv(bias = attention_6_qkvproj_bias_0, dilations = attention_6_qkvproj_dilations_0, groups = attention_6_qkvproj_groups_0, pad = attention_6_qkvproj_pad_0, pad_type = attention_6_qkvproj_pad_type_0, strides = attention_6_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_25, x = block_6_attention_rmsnorm)[name = string("attention_6_qkvproj")]; + tensor attention_6_head_reshape_shape_0 = const()[name = string("attention_6_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_6_head_reshape = reshape(shape = attention_6_head_reshape_shape_0, x = attention_6_qkvproj)[name = string("attention_6_head_reshape")]; + tensor attention_6_head_transpose_perm_0 = const()[name = string("attention_6_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_6_split_qkv_heads_axis_0 = const()[name = string("attention_6_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_6_split_qkv_heads_split_sizes_0 = const()[name = string("attention_6_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_6_head_transpose = transpose(perm = attention_6_head_transpose_perm_0, x = attention_6_head_reshape)[name = string("transpose_36")]; + tensor attention_6_split_qkv_heads_0, tensor attention_6_split_qkv_heads_1, tensor attention_6_split_qkv_heads_2 = split(axis = attention_6_split_qkv_heads_axis_0, split_sizes = attention_6_split_qkv_heads_split_sizes_0, x = attention_6_head_transpose)[name = string("attention_6_split_qkv_heads")]; + tensor attention_6_q_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_6_q_rope_lhs_mult")]; + int32 attention_6_q_rotate_half_split_num_splits_0 = const()[name = string("attention_6_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_6_q_rotate_half_split_axis_0 = const()[name = string("attention_6_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_6_q_rotate_half_split_0, tensor attention_6_q_rotate_half_split_1 = split(axis = attention_6_q_rotate_half_split_axis_0, num_splits = attention_6_q_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_0)[name = string("attention_6_q_rotate_half_split")]; + fp16 attention_6_q_rotate_half_neg_y_0 = const()[name = string("attention_6_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_6_q_rotate_half_neg = mul(x = attention_6_q_rotate_half_split_1, y = attention_6_q_rotate_half_neg_y_0)[name = string("attention_6_q_rotate_half_neg")]; + int32 attention_6_q_rotate_half_concat_axis_0 = const()[name = string("attention_6_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_6_q_rotate_half_concat_interleave_0 = const()[name = string("attention_6_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_6_q_rotate_half_concat = concat(axis = attention_6_q_rotate_half_concat_axis_0, interleave = attention_6_q_rotate_half_concat_interleave_0, values = (attention_6_q_rotate_half_neg, attention_6_q_rotate_half_split_0))[name = string("attention_6_q_rotate_half_concat")]; + tensor attention_6_q_rope_rhs_mult = mul(x = attention_6_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_q_rope_rhs_mult")]; + tensor attention_6_q_rope = add(x = attention_6_q_rope_lhs_mult, y = attention_6_q_rope_rhs_mult)[name = string("attention_6_q_rope")]; + tensor attention_6_k_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_6_k_rope_lhs_mult")]; + int32 attention_6_k_rotate_half_split_num_splits_0 = const()[name = string("attention_6_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_6_k_rotate_half_split_axis_0 = const()[name = string("attention_6_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_6_k_rotate_half_split_0, tensor attention_6_k_rotate_half_split_1 = split(axis = attention_6_k_rotate_half_split_axis_0, num_splits = attention_6_k_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_1)[name = string("attention_6_k_rotate_half_split")]; + fp16 attention_6_k_rotate_half_neg_y_0 = const()[name = string("attention_6_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_6_k_rotate_half_neg = mul(x = attention_6_k_rotate_half_split_1, y = attention_6_k_rotate_half_neg_y_0)[name = string("attention_6_k_rotate_half_neg")]; + int32 attention_6_k_rotate_half_concat_axis_0 = const()[name = string("attention_6_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_6_k_rotate_half_concat_interleave_0 = const()[name = string("attention_6_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_6_k_rotate_half_concat = concat(axis = attention_6_k_rotate_half_concat_axis_0, interleave = attention_6_k_rotate_half_concat_interleave_0, values = (attention_6_k_rotate_half_neg, attention_6_k_rotate_half_split_0))[name = string("attention_6_k_rotate_half_concat")]; + tensor attention_6_k_rope_rhs_mult = mul(x = attention_6_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_k_rope_rhs_mult")]; + tensor attention_6_k_rope = add(x = attention_6_k_rope_lhs_mult, y = attention_6_k_rope_rhs_mult)[name = string("attention_6_k_rope")]; + int32 attention_6_q_splits_axis_0 = const()[name = string("attention_6_q_splits_axis_0"), val = int32(1)]; + int32 attention_6_q_splits_num_splits_0 = const()[name = string("attention_6_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_6_q_splits_0, tensor attention_6_q_splits_1 = split(axis = attention_6_q_splits_axis_0, num_splits = attention_6_q_splits_num_splits_0, x = attention_6_q_rope)[name = string("attention_6_q_splits")]; + tensor attention_6_update_begin_0_values0_0 = const()[name = string("attention_6_update_begin_0_values0_0"), val = tensor([6])]; + tensor attention_6_update_begin_0_values1_0 = const()[name = string("attention_6_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_6_update_begin_0_values3_0 = const()[name = string("attention_6_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_6_update_begin_0_axis_0 = const()[name = string("attention_6_update_begin_0_axis_0"), val = int32(0)]; + bool attention_6_update_begin_0_interleave_0 = const()[name = string("attention_6_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_6_update_begin_0 = concat(axis = attention_6_update_begin_0_axis_0, interleave = attention_6_update_begin_0_interleave_0, values = (attention_6_update_begin_0_values0_0, attention_6_update_begin_0_values1_0, query_pos1, attention_6_update_begin_0_values3_0))[name = string("attention_6_update_begin_0")]; + tensor attention_6_update_end_0_values0_0 = const()[name = string("attention_6_update_end_0_values0_0"), val = tensor([7])]; + tensor attention_6_update_end_0_values1_0 = const()[name = string("attention_6_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_6_update_end_0_values3_0 = const()[name = string("attention_6_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_6_update_end_0_axis_0 = const()[name = string("attention_6_update_end_0_axis_0"), val = int32(0)]; + bool attention_6_update_end_0_interleave_0 = const()[name = string("attention_6_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_6_update_end_0 = concat(axis = attention_6_update_end_0_axis_0, interleave = attention_6_update_end_0_interleave_0, values = (attention_6_update_end_0_values0_0, attention_6_update_end_0_values1_0, end_pos_0, attention_6_update_end_0_values3_0))[name = string("attention_6_update_end_0")]; + tensor attention_6_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_updated_key_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_key_cache_0_squeeze_mask_0, update = attention_6_k_rope, x = coreml_update_state_10)[name = string("attention_6_updated_key_cache_0")]; + write_state(data = attention_6_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_12 = read_state(input = key_cache_state)[name = string("coreml_update_state_60")]; + tensor attention_6_key_cache_begin_0 = const()[name = string("attention_6_key_cache_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor attention_6_key_cache_end_0 = const()[name = string("attention_6_key_cache_end_0"), val = tensor([7, 2, 512, 64])]; + tensor attention_6_key_cache_squeeze_mask_0 = const()[name = string("attention_6_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_key_cache = slice_by_index(begin = attention_6_key_cache_begin_0, end = attention_6_key_cache_end_0, squeeze_mask = attention_6_key_cache_squeeze_mask_0, x = coreml_update_state_12)[name = string("attention_6_key_cache")]; + int32 attention_6_key_cache_head_axis_0 = const()[name = string("attention_6_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_6_key_cache_head_num_splits_0 = const()[name = string("attention_6_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_6_key_cache_head_0, tensor attention_6_key_cache_head_1 = split(axis = attention_6_key_cache_head_axis_0, num_splits = attention_6_key_cache_head_num_splits_0, x = attention_6_key_cache)[name = string("attention_6_key_cache_head")]; + tensor attention_6_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_updated_value_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_value_cache_0_squeeze_mask_0, update = attention_6_split_qkv_heads_2, x = coreml_update_state_11)[name = string("attention_6_updated_value_cache_0")]; + write_state(data = attention_6_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_13 = read_state(input = value_cache_state)[name = string("coreml_update_state_61")]; + tensor attention_6_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_6_slice_current_layer_value_cache_begin_0"), val = tensor([6, 0, 0, 0])]; + tensor attention_6_slice_current_layer_value_cache_end_0 = const()[name = string("attention_6_slice_current_layer_value_cache_end_0"), val = tensor([7, 2, 512, 64])]; + tensor attention_6_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_6_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_6_slice_current_layer_value_cache = slice_by_index(begin = attention_6_slice_current_layer_value_cache_begin_0, end = attention_6_slice_current_layer_value_cache_end_0, squeeze_mask = attention_6_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_13)[name = string("attention_6_slice_current_layer_value_cache")]; + int32 attention_6_slice_value_cache_heads_axis_0 = const()[name = string("attention_6_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_6_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_6_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_6_slice_value_cache_heads_0, tensor attention_6_slice_value_cache_heads_1 = split(axis = attention_6_slice_value_cache_heads_axis_0, num_splits = attention_6_slice_value_cache_heads_num_splits_0, x = attention_6_slice_current_layer_value_cache)[name = string("attention_6_slice_value_cache_heads")]; + bool attention_6_scores_0_transpose_y_0 = const()[name = string("attention_6_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_6_scores_0_transpose_x_0 = const()[name = string("attention_6_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_6_scores_0 = matmul(transpose_x = attention_6_scores_0_transpose_x_0, transpose_y = attention_6_scores_0_transpose_y_0, x = attention_6_key_cache_head_0, y = attention_6_q_splits_0)[name = string("attention_6_scores_0")]; + fp16 attention_6_scaled_scores_0_y_0 = const()[name = string("attention_6_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_6_scaled_scores_0 = mul(x = attention_6_scores_0, y = attention_6_scaled_scores_0_y_0)[name = string("attention_6_scaled_scores_0")]; + tensor attention_6_masked_scaled_scores_0 = add(x = attention_6_scaled_scores_0, y = transpose_0)[name = string("attention_6_masked_scaled_scores_0")]; + int32 softmax_12_axis_0 = const()[name = string("softmax_12_axis_0"), val = int32(-2)]; + tensor softmax_12 = softmax(axis = softmax_12_axis_0, x = attention_6_masked_scaled_scores_0)[name = string("softmax_12")]; + bool attention_6_attention_0_transpose_x_0 = const()[name = string("attention_6_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_6_attention_0_transpose_y_0 = const()[name = string("attention_6_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_6_attention_0 = matmul(transpose_x = attention_6_attention_0_transpose_x_0, transpose_y = attention_6_attention_0_transpose_y_0, x = softmax_12, y = attention_6_slice_value_cache_heads_0)[name = string("attention_6_attention_0")]; + bool attention_6_scores_1_transpose_y_0 = const()[name = string("attention_6_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_6_scores_1_transpose_x_0 = const()[name = string("attention_6_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_6_scores_1 = matmul(transpose_x = attention_6_scores_1_transpose_x_0, transpose_y = attention_6_scores_1_transpose_y_0, x = attention_6_key_cache_head_1, y = attention_6_q_splits_1)[name = string("attention_6_scores_1")]; + fp16 attention_6_scaled_scores_1_y_0 = const()[name = string("attention_6_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_6_scaled_scores_1 = mul(x = attention_6_scores_1, y = attention_6_scaled_scores_1_y_0)[name = string("attention_6_scaled_scores_1")]; + tensor attention_6_masked_scaled_scores_1 = add(x = attention_6_scaled_scores_1, y = transpose_0)[name = string("attention_6_masked_scaled_scores_1")]; + int32 softmax_13_axis_0 = const()[name = string("softmax_13_axis_0"), val = int32(-2)]; + tensor softmax_13 = softmax(axis = softmax_13_axis_0, x = attention_6_masked_scaled_scores_1)[name = string("softmax_13")]; + bool attention_6_attention_1_transpose_x_0 = const()[name = string("attention_6_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_6_attention_1_transpose_y_0 = const()[name = string("attention_6_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_6_attention_1 = matmul(transpose_x = attention_6_attention_1_transpose_x_0, transpose_y = attention_6_attention_1_transpose_y_0, x = softmax_13, y = attention_6_slice_value_cache_heads_1)[name = string("attention_6_attention_1")]; + int32 attention_6_concat_attention_all_heads_axis_0 = const()[name = string("attention_6_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_6_concat_attention_all_heads_interleave_0 = const()[name = string("attention_6_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_6_concat_attention_all_heads = concat(axis = attention_6_concat_attention_all_heads_axis_0, interleave = attention_6_concat_attention_all_heads_interleave_0, values = (attention_6_attention_0, attention_6_attention_1))[name = string("attention_6_concat_attention_all_heads")]; + tensor attention_6_channels_first_retransposed_perm_0 = const()[name = string("attention_6_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_6_reshaped_shape_0 = const()[name = string("attention_6_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_6_channels_first_retransposed = transpose(perm = attention_6_channels_first_retransposed_perm_0, x = attention_6_concat_attention_all_heads)[name = string("transpose_35")]; + tensor attention_6_reshaped = reshape(shape = attention_6_reshaped_shape_0, x = attention_6_channels_first_retransposed)[name = string("attention_6_reshaped")]; + tensor attention_6_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361667840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362270016))))[name = string("attention_6_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_26 = constexpr_blockwise_shift_scale(data = attention_6_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362298752))))[name = string("constexpr_blockwise_shift_scale_26")]; + tensor attention_6_outproj_strides_0 = const()[name = string("attention_6_outproj_strides_0"), val = tensor([1])]; + string attention_6_outproj_pad_type_0 = const()[name = string("attention_6_outproj_pad_type_0"), val = string("valid")]; + tensor attention_6_outproj_pad_0 = const()[name = string("attention_6_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_6_outproj_dilations_0 = const()[name = string("attention_6_outproj_dilations_0"), val = tensor([1])]; + int32 attention_6_outproj_groups_0 = const()[name = string("attention_6_outproj_groups_0"), val = int32(1)]; + tensor attention_6_outproj = conv(dilations = attention_6_outproj_dilations_0, groups = attention_6_outproj_groups_0, pad = attention_6_outproj_pad_0, pad_type = attention_6_outproj_pad_type_0, strides = attention_6_outproj_strides_0, weight = constexpr_blockwise_shift_scale_26, x = attention_6_reshaped)[name = string("attention_6_outproj")]; + tensor block_6_residual_1 = add(x = block_5_residual_2, y = attention_6_outproj)[name = string("block_6_residual_1")]; + tensor block_6_ffn_rmsnorm_abs = abs(x = block_6_residual_1)[name = string("block_6_ffn_rmsnorm_abs")]; + tensor block_6_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_6_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_6_ffn_rmsnorm_maxval = reduce_max(axes = block_6_ffn_rmsnorm_maxval_axes_0, keep_dims = block_6_ffn_rmsnorm_maxval_keep_dims_0, x = block_6_ffn_rmsnorm_abs)[name = string("block_6_ffn_rmsnorm_maxval")]; + fp16 block_6_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_6_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_6_ffn_rmsnorm_maxval_clipped = clip(alpha = block_6_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_6_ffn_rmsnorm_maxval_clipped_beta_0, x = block_6_ffn_rmsnorm_maxval)[name = string("block_6_ffn_rmsnorm_maxval_clipped")]; + tensor block_6_ffn_rmsnorm_scaled = real_div(x = block_6_residual_1, y = block_6_ffn_rmsnorm_maxval_clipped)[name = string("block_6_ffn_rmsnorm_scaled")]; + tensor block_6_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_6_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_6_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_6_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_6_ffn_rmsnorm_scaled)[name = string("block_6_ffn_rmsnorm_squared_sum")]; + fp16 block_6_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_6_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_6_ffn_rmsnorm_rsqrt_epsilon_0, x = block_6_ffn_rmsnorm_squared_sum)[name = string("block_6_ffn_rmsnorm_rsqrt")]; + fp16 block_6_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_6_ffn_rmsnorm_dim_scaled = mul(x = block_6_ffn_rmsnorm_scaled, y = block_6_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_6_ffn_rmsnorm_dim_scaled")]; + tensor block_6_ffn_rmsnorm_normalized = mul(x = block_6_ffn_rmsnorm_dim_scaled, y = block_6_ffn_rmsnorm_rsqrt)[name = string("block_6_ffn_rmsnorm_normalized")]; + tensor block_6_ffn_rmsnorm_y_0 = const()[name = string("block_6_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362300608)))]; + tensor block_6_ffn_rmsnorm = mul(x = block_6_ffn_rmsnorm_normalized, y = block_6_ffn_rmsnorm_y_0)[name = string("block_6_ffn_rmsnorm")]; + tensor block_6_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362302464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365571136))))[name = string("block_6_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_27 = constexpr_blockwise_shift_scale(data = block_6_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365726848))))[name = string("constexpr_blockwise_shift_scale_27")]; + tensor block_6_ffn_inproj_strides_0 = const()[name = string("block_6_ffn_inproj_strides_0"), val = tensor([1])]; + string block_6_ffn_inproj_pad_type_0 = const()[name = string("block_6_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_6_ffn_inproj_pad_0 = const()[name = string("block_6_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_6_ffn_inproj_dilations_0 = const()[name = string("block_6_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_6_ffn_inproj_groups_0 = const()[name = string("block_6_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_6_ffn_inproj = conv(dilations = block_6_ffn_inproj_dilations_0, groups = block_6_ffn_inproj_groups_0, pad = block_6_ffn_inproj_pad_0, pad_type = block_6_ffn_inproj_pad_type_0, strides = block_6_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_27, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_inproj")]; + tensor block_6_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365736640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369005312))))[name = string("block_6_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_28 = constexpr_blockwise_shift_scale(data = block_6_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369161024))))[name = string("constexpr_blockwise_shift_scale_28")]; + tensor block_6_ffn_g_strides_0 = const()[name = string("block_6_ffn_g_strides_0"), val = tensor([1])]; + string block_6_ffn_g_pad_type_0 = const()[name = string("block_6_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_6_ffn_g_pad_0 = const()[name = string("block_6_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_6_ffn_g_dilations_0 = const()[name = string("block_6_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_6_ffn_g_groups_0 = const()[name = string("block_6_ffn_g_groups_0"), val = int32(1)]; + tensor block_6_ffn_g = conv(dilations = block_6_ffn_g_dilations_0, groups = block_6_ffn_g_groups_0, pad = block_6_ffn_g_pad_0, pad_type = block_6_ffn_g_pad_type_0, strides = block_6_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_28, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_g")]; + tensor block_6_ffn_g_activation = silu(x = block_6_ffn_g)[name = string("block_6_ffn_g_activation")]; + tensor block_6_ffn_x_gated = mul(x = block_6_ffn_inproj, y = block_6_ffn_g_activation)[name = string("block_6_ffn_x_gated")]; + tensor block_6_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369170816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372439488))))[name = string("block_6_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_29 = constexpr_blockwise_shift_scale(data = block_6_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372468224))))[name = string("constexpr_blockwise_shift_scale_29")]; + tensor block_6_ffn_outproj_strides_0 = const()[name = string("block_6_ffn_outproj_strides_0"), val = tensor([1])]; + string block_6_ffn_outproj_pad_type_0 = const()[name = string("block_6_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_6_ffn_outproj_pad_0 = const()[name = string("block_6_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_6_ffn_outproj_dilations_0 = const()[name = string("block_6_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_6_ffn_outproj_groups_0 = const()[name = string("block_6_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_6_ffn_outproj = conv(dilations = block_6_ffn_outproj_dilations_0, groups = block_6_ffn_outproj_groups_0, pad = block_6_ffn_outproj_pad_0, pad_type = block_6_ffn_outproj_pad_type_0, strides = block_6_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_29, x = block_6_ffn_x_gated)[name = string("block_6_ffn_outproj")]; + tensor block_6_residual_2 = add(x = block_6_ffn_outproj, y = block_6_residual_1)[name = string("block_6_residual_2")]; + tensor block_7_attention_rmsnorm_abs = abs(x = block_6_residual_2)[name = string("block_7_attention_rmsnorm_abs")]; + tensor block_7_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_7_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_7_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_7_attention_rmsnorm_maxval = reduce_max(axes = block_7_attention_rmsnorm_maxval_axes_0, keep_dims = block_7_attention_rmsnorm_maxval_keep_dims_0, x = block_7_attention_rmsnorm_abs)[name = string("block_7_attention_rmsnorm_maxval")]; + fp16 block_7_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_7_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_7_attention_rmsnorm_maxval_clipped = clip(alpha = block_7_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_7_attention_rmsnorm_maxval_clipped_beta_0, x = block_7_attention_rmsnorm_maxval)[name = string("block_7_attention_rmsnorm_maxval_clipped")]; + tensor block_7_attention_rmsnorm_scaled = real_div(x = block_6_residual_2, y = block_7_attention_rmsnorm_maxval_clipped)[name = string("block_7_attention_rmsnorm_scaled")]; + tensor block_7_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_7_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_7_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_7_attention_rmsnorm_squared_sum_keep_dims_0, x = block_7_attention_rmsnorm_scaled)[name = string("block_7_attention_rmsnorm_squared_sum")]; + fp16 block_7_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_7_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_7_attention_rmsnorm_rsqrt_epsilon_0, x = block_7_attention_rmsnorm_squared_sum)[name = string("block_7_attention_rmsnorm_rsqrt")]; + fp16 block_7_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_7_attention_rmsnorm_dim_scaled = mul(x = block_7_attention_rmsnorm_scaled, y = block_7_attention_rmsnorm_dim_scaled_y_0)[name = string("block_7_attention_rmsnorm_dim_scaled")]; + tensor block_7_attention_rmsnorm_normalized = mul(x = block_7_attention_rmsnorm_dim_scaled, y = block_7_attention_rmsnorm_rsqrt)[name = string("block_7_attention_rmsnorm_normalized")]; + tensor block_7_attention_rmsnorm_y_0 = const()[name = string("block_7_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372470080)))]; + tensor block_7_attention_rmsnorm = mul(x = block_7_attention_rmsnorm_normalized, y = block_7_attention_rmsnorm_y_0)[name = string("block_7_attention_rmsnorm")]; + tensor attention_7_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372471936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373246144))))[name = string("attention_7_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_30 = constexpr_blockwise_shift_scale(data = attention_7_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373283072))))[name = string("constexpr_blockwise_shift_scale_30")]; + tensor attention_7_qkvproj_bias_0 = const()[name = string("attention_7_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373285440)))]; + tensor attention_7_qkvproj_strides_0 = const()[name = string("attention_7_qkvproj_strides_0"), val = tensor([1])]; + string attention_7_qkvproj_pad_type_0 = const()[name = string("attention_7_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_7_qkvproj_pad_0 = const()[name = string("attention_7_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_7_qkvproj_dilations_0 = const()[name = string("attention_7_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_7_qkvproj_groups_0 = const()[name = string("attention_7_qkvproj_groups_0"), val = int32(1)]; + tensor attention_7_qkvproj = conv(bias = attention_7_qkvproj_bias_0, dilations = attention_7_qkvproj_dilations_0, groups = attention_7_qkvproj_groups_0, pad = attention_7_qkvproj_pad_0, pad_type = attention_7_qkvproj_pad_type_0, strides = attention_7_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_30, x = block_7_attention_rmsnorm)[name = string("attention_7_qkvproj")]; + tensor attention_7_head_reshape_shape_0 = const()[name = string("attention_7_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_7_head_reshape = reshape(shape = attention_7_head_reshape_shape_0, x = attention_7_qkvproj)[name = string("attention_7_head_reshape")]; + tensor attention_7_head_transpose_perm_0 = const()[name = string("attention_7_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_7_split_qkv_heads_axis_0 = const()[name = string("attention_7_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_7_split_qkv_heads_split_sizes_0 = const()[name = string("attention_7_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_7_head_transpose = transpose(perm = attention_7_head_transpose_perm_0, x = attention_7_head_reshape)[name = string("transpose_34")]; + tensor attention_7_split_qkv_heads_0, tensor attention_7_split_qkv_heads_1, tensor attention_7_split_qkv_heads_2 = split(axis = attention_7_split_qkv_heads_axis_0, split_sizes = attention_7_split_qkv_heads_split_sizes_0, x = attention_7_head_transpose)[name = string("attention_7_split_qkv_heads")]; + tensor attention_7_q_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_7_q_rope_lhs_mult")]; + int32 attention_7_q_rotate_half_split_num_splits_0 = const()[name = string("attention_7_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_7_q_rotate_half_split_axis_0 = const()[name = string("attention_7_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_7_q_rotate_half_split_0, tensor attention_7_q_rotate_half_split_1 = split(axis = attention_7_q_rotate_half_split_axis_0, num_splits = attention_7_q_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_0)[name = string("attention_7_q_rotate_half_split")]; + fp16 attention_7_q_rotate_half_neg_y_0 = const()[name = string("attention_7_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_7_q_rotate_half_neg = mul(x = attention_7_q_rotate_half_split_1, y = attention_7_q_rotate_half_neg_y_0)[name = string("attention_7_q_rotate_half_neg")]; + int32 attention_7_q_rotate_half_concat_axis_0 = const()[name = string("attention_7_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_7_q_rotate_half_concat_interleave_0 = const()[name = string("attention_7_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_7_q_rotate_half_concat = concat(axis = attention_7_q_rotate_half_concat_axis_0, interleave = attention_7_q_rotate_half_concat_interleave_0, values = (attention_7_q_rotate_half_neg, attention_7_q_rotate_half_split_0))[name = string("attention_7_q_rotate_half_concat")]; + tensor attention_7_q_rope_rhs_mult = mul(x = attention_7_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_q_rope_rhs_mult")]; + tensor attention_7_q_rope = add(x = attention_7_q_rope_lhs_mult, y = attention_7_q_rope_rhs_mult)[name = string("attention_7_q_rope")]; + tensor attention_7_k_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_7_k_rope_lhs_mult")]; + int32 attention_7_k_rotate_half_split_num_splits_0 = const()[name = string("attention_7_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_7_k_rotate_half_split_axis_0 = const()[name = string("attention_7_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_7_k_rotate_half_split_0, tensor attention_7_k_rotate_half_split_1 = split(axis = attention_7_k_rotate_half_split_axis_0, num_splits = attention_7_k_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_1)[name = string("attention_7_k_rotate_half_split")]; + fp16 attention_7_k_rotate_half_neg_y_0 = const()[name = string("attention_7_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_7_k_rotate_half_neg = mul(x = attention_7_k_rotate_half_split_1, y = attention_7_k_rotate_half_neg_y_0)[name = string("attention_7_k_rotate_half_neg")]; + int32 attention_7_k_rotate_half_concat_axis_0 = const()[name = string("attention_7_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_7_k_rotate_half_concat_interleave_0 = const()[name = string("attention_7_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_7_k_rotate_half_concat = concat(axis = attention_7_k_rotate_half_concat_axis_0, interleave = attention_7_k_rotate_half_concat_interleave_0, values = (attention_7_k_rotate_half_neg, attention_7_k_rotate_half_split_0))[name = string("attention_7_k_rotate_half_concat")]; + tensor attention_7_k_rope_rhs_mult = mul(x = attention_7_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_k_rope_rhs_mult")]; + tensor attention_7_k_rope = add(x = attention_7_k_rope_lhs_mult, y = attention_7_k_rope_rhs_mult)[name = string("attention_7_k_rope")]; + int32 attention_7_q_splits_axis_0 = const()[name = string("attention_7_q_splits_axis_0"), val = int32(1)]; + int32 attention_7_q_splits_num_splits_0 = const()[name = string("attention_7_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_7_q_splits_0, tensor attention_7_q_splits_1 = split(axis = attention_7_q_splits_axis_0, num_splits = attention_7_q_splits_num_splits_0, x = attention_7_q_rope)[name = string("attention_7_q_splits")]; + tensor attention_7_update_begin_0_values0_0 = const()[name = string("attention_7_update_begin_0_values0_0"), val = tensor([7])]; + tensor attention_7_update_begin_0_values1_0 = const()[name = string("attention_7_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_7_update_begin_0_values3_0 = const()[name = string("attention_7_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_7_update_begin_0_axis_0 = const()[name = string("attention_7_update_begin_0_axis_0"), val = int32(0)]; + bool attention_7_update_begin_0_interleave_0 = const()[name = string("attention_7_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_7_update_begin_0 = concat(axis = attention_7_update_begin_0_axis_0, interleave = attention_7_update_begin_0_interleave_0, values = (attention_7_update_begin_0_values0_0, attention_7_update_begin_0_values1_0, query_pos1, attention_7_update_begin_0_values3_0))[name = string("attention_7_update_begin_0")]; + tensor attention_7_update_end_0_values0_0 = const()[name = string("attention_7_update_end_0_values0_0"), val = tensor([8])]; + tensor attention_7_update_end_0_values1_0 = const()[name = string("attention_7_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_7_update_end_0_values3_0 = const()[name = string("attention_7_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_7_update_end_0_axis_0 = const()[name = string("attention_7_update_end_0_axis_0"), val = int32(0)]; + bool attention_7_update_end_0_interleave_0 = const()[name = string("attention_7_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_7_update_end_0 = concat(axis = attention_7_update_end_0_axis_0, interleave = attention_7_update_end_0_interleave_0, values = (attention_7_update_end_0_values0_0, attention_7_update_end_0_values1_0, end_pos_0, attention_7_update_end_0_values3_0))[name = string("attention_7_update_end_0")]; + tensor attention_7_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_updated_key_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_key_cache_0_squeeze_mask_0, update = attention_7_k_rope, x = coreml_update_state_12)[name = string("attention_7_updated_key_cache_0")]; + write_state(data = attention_7_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_14 = read_state(input = key_cache_state)[name = string("coreml_update_state_62")]; + tensor attention_7_key_cache_begin_0 = const()[name = string("attention_7_key_cache_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor attention_7_key_cache_end_0 = const()[name = string("attention_7_key_cache_end_0"), val = tensor([8, 2, 512, 64])]; + tensor attention_7_key_cache_squeeze_mask_0 = const()[name = string("attention_7_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_key_cache = slice_by_index(begin = attention_7_key_cache_begin_0, end = attention_7_key_cache_end_0, squeeze_mask = attention_7_key_cache_squeeze_mask_0, x = coreml_update_state_14)[name = string("attention_7_key_cache")]; + int32 attention_7_key_cache_head_axis_0 = const()[name = string("attention_7_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_7_key_cache_head_num_splits_0 = const()[name = string("attention_7_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_7_key_cache_head_0, tensor attention_7_key_cache_head_1 = split(axis = attention_7_key_cache_head_axis_0, num_splits = attention_7_key_cache_head_num_splits_0, x = attention_7_key_cache)[name = string("attention_7_key_cache_head")]; + tensor attention_7_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_updated_value_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_value_cache_0_squeeze_mask_0, update = attention_7_split_qkv_heads_2, x = coreml_update_state_13)[name = string("attention_7_updated_value_cache_0")]; + write_state(data = attention_7_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_15 = read_state(input = value_cache_state)[name = string("coreml_update_state_63")]; + tensor attention_7_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_7_slice_current_layer_value_cache_begin_0"), val = tensor([7, 0, 0, 0])]; + tensor attention_7_slice_current_layer_value_cache_end_0 = const()[name = string("attention_7_slice_current_layer_value_cache_end_0"), val = tensor([8, 2, 512, 64])]; + tensor attention_7_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_7_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_7_slice_current_layer_value_cache = slice_by_index(begin = attention_7_slice_current_layer_value_cache_begin_0, end = attention_7_slice_current_layer_value_cache_end_0, squeeze_mask = attention_7_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_15)[name = string("attention_7_slice_current_layer_value_cache")]; + int32 attention_7_slice_value_cache_heads_axis_0 = const()[name = string("attention_7_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_7_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_7_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_7_slice_value_cache_heads_0, tensor attention_7_slice_value_cache_heads_1 = split(axis = attention_7_slice_value_cache_heads_axis_0, num_splits = attention_7_slice_value_cache_heads_num_splits_0, x = attention_7_slice_current_layer_value_cache)[name = string("attention_7_slice_value_cache_heads")]; + bool attention_7_scores_0_transpose_y_0 = const()[name = string("attention_7_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_7_scores_0_transpose_x_0 = const()[name = string("attention_7_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_7_scores_0 = matmul(transpose_x = attention_7_scores_0_transpose_x_0, transpose_y = attention_7_scores_0_transpose_y_0, x = attention_7_key_cache_head_0, y = attention_7_q_splits_0)[name = string("attention_7_scores_0")]; + fp16 attention_7_scaled_scores_0_y_0 = const()[name = string("attention_7_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_7_scaled_scores_0 = mul(x = attention_7_scores_0, y = attention_7_scaled_scores_0_y_0)[name = string("attention_7_scaled_scores_0")]; + tensor attention_7_masked_scaled_scores_0 = add(x = attention_7_scaled_scores_0, y = transpose_0)[name = string("attention_7_masked_scaled_scores_0")]; + int32 softmax_14_axis_0 = const()[name = string("softmax_14_axis_0"), val = int32(-2)]; + tensor softmax_14 = softmax(axis = softmax_14_axis_0, x = attention_7_masked_scaled_scores_0)[name = string("softmax_14")]; + bool attention_7_attention_0_transpose_x_0 = const()[name = string("attention_7_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_7_attention_0_transpose_y_0 = const()[name = string("attention_7_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_7_attention_0 = matmul(transpose_x = attention_7_attention_0_transpose_x_0, transpose_y = attention_7_attention_0_transpose_y_0, x = softmax_14, y = attention_7_slice_value_cache_heads_0)[name = string("attention_7_attention_0")]; + bool attention_7_scores_1_transpose_y_0 = const()[name = string("attention_7_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_7_scores_1_transpose_x_0 = const()[name = string("attention_7_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_7_scores_1 = matmul(transpose_x = attention_7_scores_1_transpose_x_0, transpose_y = attention_7_scores_1_transpose_y_0, x = attention_7_key_cache_head_1, y = attention_7_q_splits_1)[name = string("attention_7_scores_1")]; + fp16 attention_7_scaled_scores_1_y_0 = const()[name = string("attention_7_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_7_scaled_scores_1 = mul(x = attention_7_scores_1, y = attention_7_scaled_scores_1_y_0)[name = string("attention_7_scaled_scores_1")]; + tensor attention_7_masked_scaled_scores_1 = add(x = attention_7_scaled_scores_1, y = transpose_0)[name = string("attention_7_masked_scaled_scores_1")]; + int32 softmax_15_axis_0 = const()[name = string("softmax_15_axis_0"), val = int32(-2)]; + tensor softmax_15 = softmax(axis = softmax_15_axis_0, x = attention_7_masked_scaled_scores_1)[name = string("softmax_15")]; + bool attention_7_attention_1_transpose_x_0 = const()[name = string("attention_7_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_7_attention_1_transpose_y_0 = const()[name = string("attention_7_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_7_attention_1 = matmul(transpose_x = attention_7_attention_1_transpose_x_0, transpose_y = attention_7_attention_1_transpose_y_0, x = softmax_15, y = attention_7_slice_value_cache_heads_1)[name = string("attention_7_attention_1")]; + int32 attention_7_concat_attention_all_heads_axis_0 = const()[name = string("attention_7_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_7_concat_attention_all_heads_interleave_0 = const()[name = string("attention_7_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_7_concat_attention_all_heads = concat(axis = attention_7_concat_attention_all_heads_axis_0, interleave = attention_7_concat_attention_all_heads_interleave_0, values = (attention_7_attention_0, attention_7_attention_1))[name = string("attention_7_concat_attention_all_heads")]; + tensor attention_7_channels_first_retransposed_perm_0 = const()[name = string("attention_7_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_7_reshaped_shape_0 = const()[name = string("attention_7_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_7_channels_first_retransposed = transpose(perm = attention_7_channels_first_retransposed_perm_0, x = attention_7_concat_attention_all_heads)[name = string("transpose_33")]; + tensor attention_7_reshaped = reshape(shape = attention_7_reshaped_shape_0, x = attention_7_channels_first_retransposed)[name = string("attention_7_reshaped")]; + tensor attention_7_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373287808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373889984))))[name = string("attention_7_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_31 = constexpr_blockwise_shift_scale(data = attention_7_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373918720))))[name = string("constexpr_blockwise_shift_scale_31")]; + tensor attention_7_outproj_strides_0 = const()[name = string("attention_7_outproj_strides_0"), val = tensor([1])]; + string attention_7_outproj_pad_type_0 = const()[name = string("attention_7_outproj_pad_type_0"), val = string("valid")]; + tensor attention_7_outproj_pad_0 = const()[name = string("attention_7_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_7_outproj_dilations_0 = const()[name = string("attention_7_outproj_dilations_0"), val = tensor([1])]; + int32 attention_7_outproj_groups_0 = const()[name = string("attention_7_outproj_groups_0"), val = int32(1)]; + tensor attention_7_outproj = conv(dilations = attention_7_outproj_dilations_0, groups = attention_7_outproj_groups_0, pad = attention_7_outproj_pad_0, pad_type = attention_7_outproj_pad_type_0, strides = attention_7_outproj_strides_0, weight = constexpr_blockwise_shift_scale_31, x = attention_7_reshaped)[name = string("attention_7_outproj")]; + tensor block_7_residual_1 = add(x = block_6_residual_2, y = attention_7_outproj)[name = string("block_7_residual_1")]; + tensor block_7_ffn_rmsnorm_abs = abs(x = block_7_residual_1)[name = string("block_7_ffn_rmsnorm_abs")]; + tensor block_7_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_7_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_7_ffn_rmsnorm_maxval = reduce_max(axes = block_7_ffn_rmsnorm_maxval_axes_0, keep_dims = block_7_ffn_rmsnorm_maxval_keep_dims_0, x = block_7_ffn_rmsnorm_abs)[name = string("block_7_ffn_rmsnorm_maxval")]; + fp16 block_7_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_7_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_7_ffn_rmsnorm_maxval_clipped = clip(alpha = block_7_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_7_ffn_rmsnorm_maxval_clipped_beta_0, x = block_7_ffn_rmsnorm_maxval)[name = string("block_7_ffn_rmsnorm_maxval_clipped")]; + tensor block_7_ffn_rmsnorm_scaled = real_div(x = block_7_residual_1, y = block_7_ffn_rmsnorm_maxval_clipped)[name = string("block_7_ffn_rmsnorm_scaled")]; + tensor block_7_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_7_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_7_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_7_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_7_ffn_rmsnorm_scaled)[name = string("block_7_ffn_rmsnorm_squared_sum")]; + fp16 block_7_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_7_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_7_ffn_rmsnorm_rsqrt_epsilon_0, x = block_7_ffn_rmsnorm_squared_sum)[name = string("block_7_ffn_rmsnorm_rsqrt")]; + fp16 block_7_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_7_ffn_rmsnorm_dim_scaled = mul(x = block_7_ffn_rmsnorm_scaled, y = block_7_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_7_ffn_rmsnorm_dim_scaled")]; + tensor block_7_ffn_rmsnorm_normalized = mul(x = block_7_ffn_rmsnorm_dim_scaled, y = block_7_ffn_rmsnorm_rsqrt)[name = string("block_7_ffn_rmsnorm_normalized")]; + tensor block_7_ffn_rmsnorm_y_0 = const()[name = string("block_7_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373920576)))]; + tensor block_7_ffn_rmsnorm = mul(x = block_7_ffn_rmsnorm_normalized, y = block_7_ffn_rmsnorm_y_0)[name = string("block_7_ffn_rmsnorm")]; + tensor block_7_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373922432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377191104))))[name = string("block_7_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_32 = constexpr_blockwise_shift_scale(data = block_7_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377346816))))[name = string("constexpr_blockwise_shift_scale_32")]; + tensor block_7_ffn_inproj_strides_0 = const()[name = string("block_7_ffn_inproj_strides_0"), val = tensor([1])]; + string block_7_ffn_inproj_pad_type_0 = const()[name = string("block_7_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_7_ffn_inproj_pad_0 = const()[name = string("block_7_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_7_ffn_inproj_dilations_0 = const()[name = string("block_7_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_7_ffn_inproj_groups_0 = const()[name = string("block_7_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_7_ffn_inproj = conv(dilations = block_7_ffn_inproj_dilations_0, groups = block_7_ffn_inproj_groups_0, pad = block_7_ffn_inproj_pad_0, pad_type = block_7_ffn_inproj_pad_type_0, strides = block_7_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_32, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_inproj")]; + tensor block_7_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377356608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380625280))))[name = string("block_7_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_33 = constexpr_blockwise_shift_scale(data = block_7_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380780992))))[name = string("constexpr_blockwise_shift_scale_33")]; + tensor block_7_ffn_g_strides_0 = const()[name = string("block_7_ffn_g_strides_0"), val = tensor([1])]; + string block_7_ffn_g_pad_type_0 = const()[name = string("block_7_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_7_ffn_g_pad_0 = const()[name = string("block_7_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_7_ffn_g_dilations_0 = const()[name = string("block_7_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_7_ffn_g_groups_0 = const()[name = string("block_7_ffn_g_groups_0"), val = int32(1)]; + tensor block_7_ffn_g = conv(dilations = block_7_ffn_g_dilations_0, groups = block_7_ffn_g_groups_0, pad = block_7_ffn_g_pad_0, pad_type = block_7_ffn_g_pad_type_0, strides = block_7_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_33, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_g")]; + tensor block_7_ffn_g_activation = silu(x = block_7_ffn_g)[name = string("block_7_ffn_g_activation")]; + tensor block_7_ffn_x_gated = mul(x = block_7_ffn_inproj, y = block_7_ffn_g_activation)[name = string("block_7_ffn_x_gated")]; + tensor block_7_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380790784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384059456))))[name = string("block_7_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_34 = constexpr_blockwise_shift_scale(data = block_7_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384088192))))[name = string("constexpr_blockwise_shift_scale_34")]; + tensor block_7_ffn_outproj_strides_0 = const()[name = string("block_7_ffn_outproj_strides_0"), val = tensor([1])]; + string block_7_ffn_outproj_pad_type_0 = const()[name = string("block_7_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_7_ffn_outproj_pad_0 = const()[name = string("block_7_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_7_ffn_outproj_dilations_0 = const()[name = string("block_7_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_7_ffn_outproj_groups_0 = const()[name = string("block_7_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_7_ffn_outproj = conv(dilations = block_7_ffn_outproj_dilations_0, groups = block_7_ffn_outproj_groups_0, pad = block_7_ffn_outproj_pad_0, pad_type = block_7_ffn_outproj_pad_type_0, strides = block_7_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_34, x = block_7_ffn_x_gated)[name = string("block_7_ffn_outproj")]; + tensor block_7_residual_2 = add(x = block_7_ffn_outproj, y = block_7_residual_1)[name = string("block_7_residual_2")]; + tensor block_8_attention_rmsnorm_abs = abs(x = block_7_residual_2)[name = string("block_8_attention_rmsnorm_abs")]; + tensor block_8_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_8_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_8_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_8_attention_rmsnorm_maxval = reduce_max(axes = block_8_attention_rmsnorm_maxval_axes_0, keep_dims = block_8_attention_rmsnorm_maxval_keep_dims_0, x = block_8_attention_rmsnorm_abs)[name = string("block_8_attention_rmsnorm_maxval")]; + fp16 block_8_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_8_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_8_attention_rmsnorm_maxval_clipped = clip(alpha = block_8_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_8_attention_rmsnorm_maxval_clipped_beta_0, x = block_8_attention_rmsnorm_maxval)[name = string("block_8_attention_rmsnorm_maxval_clipped")]; + tensor block_8_attention_rmsnorm_scaled = real_div(x = block_7_residual_2, y = block_8_attention_rmsnorm_maxval_clipped)[name = string("block_8_attention_rmsnorm_scaled")]; + tensor block_8_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_8_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_8_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_8_attention_rmsnorm_squared_sum_keep_dims_0, x = block_8_attention_rmsnorm_scaled)[name = string("block_8_attention_rmsnorm_squared_sum")]; + fp16 block_8_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_8_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_8_attention_rmsnorm_rsqrt_epsilon_0, x = block_8_attention_rmsnorm_squared_sum)[name = string("block_8_attention_rmsnorm_rsqrt")]; + fp16 block_8_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_8_attention_rmsnorm_dim_scaled = mul(x = block_8_attention_rmsnorm_scaled, y = block_8_attention_rmsnorm_dim_scaled_y_0)[name = string("block_8_attention_rmsnorm_dim_scaled")]; + tensor block_8_attention_rmsnorm_normalized = mul(x = block_8_attention_rmsnorm_dim_scaled, y = block_8_attention_rmsnorm_rsqrt)[name = string("block_8_attention_rmsnorm_normalized")]; + tensor block_8_attention_rmsnorm_y_0 = const()[name = string("block_8_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384090048)))]; + tensor block_8_attention_rmsnorm = mul(x = block_8_attention_rmsnorm_normalized, y = block_8_attention_rmsnorm_y_0)[name = string("block_8_attention_rmsnorm")]; + tensor attention_8_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384091904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384866112))))[name = string("attention_8_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_35 = constexpr_blockwise_shift_scale(data = attention_8_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384903040))))[name = string("constexpr_blockwise_shift_scale_35")]; + tensor attention_8_qkvproj_bias_0 = const()[name = string("attention_8_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384905408)))]; + tensor attention_8_qkvproj_strides_0 = const()[name = string("attention_8_qkvproj_strides_0"), val = tensor([1])]; + string attention_8_qkvproj_pad_type_0 = const()[name = string("attention_8_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_8_qkvproj_pad_0 = const()[name = string("attention_8_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_8_qkvproj_dilations_0 = const()[name = string("attention_8_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_8_qkvproj_groups_0 = const()[name = string("attention_8_qkvproj_groups_0"), val = int32(1)]; + tensor attention_8_qkvproj = conv(bias = attention_8_qkvproj_bias_0, dilations = attention_8_qkvproj_dilations_0, groups = attention_8_qkvproj_groups_0, pad = attention_8_qkvproj_pad_0, pad_type = attention_8_qkvproj_pad_type_0, strides = attention_8_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_35, x = block_8_attention_rmsnorm)[name = string("attention_8_qkvproj")]; + tensor attention_8_head_reshape_shape_0 = const()[name = string("attention_8_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_8_head_reshape = reshape(shape = attention_8_head_reshape_shape_0, x = attention_8_qkvproj)[name = string("attention_8_head_reshape")]; + tensor attention_8_head_transpose_perm_0 = const()[name = string("attention_8_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_8_split_qkv_heads_axis_0 = const()[name = string("attention_8_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_8_split_qkv_heads_split_sizes_0 = const()[name = string("attention_8_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_8_head_transpose = transpose(perm = attention_8_head_transpose_perm_0, x = attention_8_head_reshape)[name = string("transpose_32")]; + tensor attention_8_split_qkv_heads_0, tensor attention_8_split_qkv_heads_1, tensor attention_8_split_qkv_heads_2 = split(axis = attention_8_split_qkv_heads_axis_0, split_sizes = attention_8_split_qkv_heads_split_sizes_0, x = attention_8_head_transpose)[name = string("attention_8_split_qkv_heads")]; + tensor attention_8_q_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_8_q_rope_lhs_mult")]; + int32 attention_8_q_rotate_half_split_num_splits_0 = const()[name = string("attention_8_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_8_q_rotate_half_split_axis_0 = const()[name = string("attention_8_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_8_q_rotate_half_split_0, tensor attention_8_q_rotate_half_split_1 = split(axis = attention_8_q_rotate_half_split_axis_0, num_splits = attention_8_q_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_0)[name = string("attention_8_q_rotate_half_split")]; + fp16 attention_8_q_rotate_half_neg_y_0 = const()[name = string("attention_8_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_8_q_rotate_half_neg = mul(x = attention_8_q_rotate_half_split_1, y = attention_8_q_rotate_half_neg_y_0)[name = string("attention_8_q_rotate_half_neg")]; + int32 attention_8_q_rotate_half_concat_axis_0 = const()[name = string("attention_8_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_8_q_rotate_half_concat_interleave_0 = const()[name = string("attention_8_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_8_q_rotate_half_concat = concat(axis = attention_8_q_rotate_half_concat_axis_0, interleave = attention_8_q_rotate_half_concat_interleave_0, values = (attention_8_q_rotate_half_neg, attention_8_q_rotate_half_split_0))[name = string("attention_8_q_rotate_half_concat")]; + tensor attention_8_q_rope_rhs_mult = mul(x = attention_8_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_q_rope_rhs_mult")]; + tensor attention_8_q_rope = add(x = attention_8_q_rope_lhs_mult, y = attention_8_q_rope_rhs_mult)[name = string("attention_8_q_rope")]; + tensor attention_8_k_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_8_k_rope_lhs_mult")]; + int32 attention_8_k_rotate_half_split_num_splits_0 = const()[name = string("attention_8_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_8_k_rotate_half_split_axis_0 = const()[name = string("attention_8_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_8_k_rotate_half_split_0, tensor attention_8_k_rotate_half_split_1 = split(axis = attention_8_k_rotate_half_split_axis_0, num_splits = attention_8_k_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_1)[name = string("attention_8_k_rotate_half_split")]; + fp16 attention_8_k_rotate_half_neg_y_0 = const()[name = string("attention_8_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_8_k_rotate_half_neg = mul(x = attention_8_k_rotate_half_split_1, y = attention_8_k_rotate_half_neg_y_0)[name = string("attention_8_k_rotate_half_neg")]; + int32 attention_8_k_rotate_half_concat_axis_0 = const()[name = string("attention_8_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_8_k_rotate_half_concat_interleave_0 = const()[name = string("attention_8_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_8_k_rotate_half_concat = concat(axis = attention_8_k_rotate_half_concat_axis_0, interleave = attention_8_k_rotate_half_concat_interleave_0, values = (attention_8_k_rotate_half_neg, attention_8_k_rotate_half_split_0))[name = string("attention_8_k_rotate_half_concat")]; + tensor attention_8_k_rope_rhs_mult = mul(x = attention_8_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_k_rope_rhs_mult")]; + tensor attention_8_k_rope = add(x = attention_8_k_rope_lhs_mult, y = attention_8_k_rope_rhs_mult)[name = string("attention_8_k_rope")]; + int32 attention_8_q_splits_axis_0 = const()[name = string("attention_8_q_splits_axis_0"), val = int32(1)]; + int32 attention_8_q_splits_num_splits_0 = const()[name = string("attention_8_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_8_q_splits_0, tensor attention_8_q_splits_1 = split(axis = attention_8_q_splits_axis_0, num_splits = attention_8_q_splits_num_splits_0, x = attention_8_q_rope)[name = string("attention_8_q_splits")]; + tensor attention_8_update_begin_0_values0_0 = const()[name = string("attention_8_update_begin_0_values0_0"), val = tensor([8])]; + tensor attention_8_update_begin_0_values1_0 = const()[name = string("attention_8_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_8_update_begin_0_values3_0 = const()[name = string("attention_8_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_8_update_begin_0_axis_0 = const()[name = string("attention_8_update_begin_0_axis_0"), val = int32(0)]; + bool attention_8_update_begin_0_interleave_0 = const()[name = string("attention_8_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_8_update_begin_0 = concat(axis = attention_8_update_begin_0_axis_0, interleave = attention_8_update_begin_0_interleave_0, values = (attention_8_update_begin_0_values0_0, attention_8_update_begin_0_values1_0, query_pos1, attention_8_update_begin_0_values3_0))[name = string("attention_8_update_begin_0")]; + tensor attention_8_update_end_0_values0_0 = const()[name = string("attention_8_update_end_0_values0_0"), val = tensor([9])]; + tensor attention_8_update_end_0_values1_0 = const()[name = string("attention_8_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_8_update_end_0_values3_0 = const()[name = string("attention_8_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_8_update_end_0_axis_0 = const()[name = string("attention_8_update_end_0_axis_0"), val = int32(0)]; + bool attention_8_update_end_0_interleave_0 = const()[name = string("attention_8_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_8_update_end_0 = concat(axis = attention_8_update_end_0_axis_0, interleave = attention_8_update_end_0_interleave_0, values = (attention_8_update_end_0_values0_0, attention_8_update_end_0_values1_0, end_pos_0, attention_8_update_end_0_values3_0))[name = string("attention_8_update_end_0")]; + tensor attention_8_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_updated_key_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_key_cache_0_squeeze_mask_0, update = attention_8_k_rope, x = coreml_update_state_14)[name = string("attention_8_updated_key_cache_0")]; + write_state(data = attention_8_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_16 = read_state(input = key_cache_state)[name = string("coreml_update_state_64")]; + tensor attention_8_key_cache_begin_0 = const()[name = string("attention_8_key_cache_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor attention_8_key_cache_end_0 = const()[name = string("attention_8_key_cache_end_0"), val = tensor([9, 2, 512, 64])]; + tensor attention_8_key_cache_squeeze_mask_0 = const()[name = string("attention_8_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_key_cache = slice_by_index(begin = attention_8_key_cache_begin_0, end = attention_8_key_cache_end_0, squeeze_mask = attention_8_key_cache_squeeze_mask_0, x = coreml_update_state_16)[name = string("attention_8_key_cache")]; + int32 attention_8_key_cache_head_axis_0 = const()[name = string("attention_8_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_8_key_cache_head_num_splits_0 = const()[name = string("attention_8_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_8_key_cache_head_0, tensor attention_8_key_cache_head_1 = split(axis = attention_8_key_cache_head_axis_0, num_splits = attention_8_key_cache_head_num_splits_0, x = attention_8_key_cache)[name = string("attention_8_key_cache_head")]; + tensor attention_8_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_updated_value_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_value_cache_0_squeeze_mask_0, update = attention_8_split_qkv_heads_2, x = coreml_update_state_15)[name = string("attention_8_updated_value_cache_0")]; + write_state(data = attention_8_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_17 = read_state(input = value_cache_state)[name = string("coreml_update_state_65")]; + tensor attention_8_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_8_slice_current_layer_value_cache_begin_0"), val = tensor([8, 0, 0, 0])]; + tensor attention_8_slice_current_layer_value_cache_end_0 = const()[name = string("attention_8_slice_current_layer_value_cache_end_0"), val = tensor([9, 2, 512, 64])]; + tensor attention_8_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_8_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_8_slice_current_layer_value_cache = slice_by_index(begin = attention_8_slice_current_layer_value_cache_begin_0, end = attention_8_slice_current_layer_value_cache_end_0, squeeze_mask = attention_8_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_17)[name = string("attention_8_slice_current_layer_value_cache")]; + int32 attention_8_slice_value_cache_heads_axis_0 = const()[name = string("attention_8_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_8_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_8_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_8_slice_value_cache_heads_0, tensor attention_8_slice_value_cache_heads_1 = split(axis = attention_8_slice_value_cache_heads_axis_0, num_splits = attention_8_slice_value_cache_heads_num_splits_0, x = attention_8_slice_current_layer_value_cache)[name = string("attention_8_slice_value_cache_heads")]; + bool attention_8_scores_0_transpose_y_0 = const()[name = string("attention_8_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_8_scores_0_transpose_x_0 = const()[name = string("attention_8_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_8_scores_0 = matmul(transpose_x = attention_8_scores_0_transpose_x_0, transpose_y = attention_8_scores_0_transpose_y_0, x = attention_8_key_cache_head_0, y = attention_8_q_splits_0)[name = string("attention_8_scores_0")]; + fp16 attention_8_scaled_scores_0_y_0 = const()[name = string("attention_8_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_8_scaled_scores_0 = mul(x = attention_8_scores_0, y = attention_8_scaled_scores_0_y_0)[name = string("attention_8_scaled_scores_0")]; + tensor attention_8_masked_scaled_scores_0 = add(x = attention_8_scaled_scores_0, y = transpose_0)[name = string("attention_8_masked_scaled_scores_0")]; + int32 softmax_16_axis_0 = const()[name = string("softmax_16_axis_0"), val = int32(-2)]; + tensor softmax_16 = softmax(axis = softmax_16_axis_0, x = attention_8_masked_scaled_scores_0)[name = string("softmax_16")]; + bool attention_8_attention_0_transpose_x_0 = const()[name = string("attention_8_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_8_attention_0_transpose_y_0 = const()[name = string("attention_8_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_8_attention_0 = matmul(transpose_x = attention_8_attention_0_transpose_x_0, transpose_y = attention_8_attention_0_transpose_y_0, x = softmax_16, y = attention_8_slice_value_cache_heads_0)[name = string("attention_8_attention_0")]; + bool attention_8_scores_1_transpose_y_0 = const()[name = string("attention_8_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_8_scores_1_transpose_x_0 = const()[name = string("attention_8_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_8_scores_1 = matmul(transpose_x = attention_8_scores_1_transpose_x_0, transpose_y = attention_8_scores_1_transpose_y_0, x = attention_8_key_cache_head_1, y = attention_8_q_splits_1)[name = string("attention_8_scores_1")]; + fp16 attention_8_scaled_scores_1_y_0 = const()[name = string("attention_8_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_8_scaled_scores_1 = mul(x = attention_8_scores_1, y = attention_8_scaled_scores_1_y_0)[name = string("attention_8_scaled_scores_1")]; + tensor attention_8_masked_scaled_scores_1 = add(x = attention_8_scaled_scores_1, y = transpose_0)[name = string("attention_8_masked_scaled_scores_1")]; + int32 softmax_17_axis_0 = const()[name = string("softmax_17_axis_0"), val = int32(-2)]; + tensor softmax_17 = softmax(axis = softmax_17_axis_0, x = attention_8_masked_scaled_scores_1)[name = string("softmax_17")]; + bool attention_8_attention_1_transpose_x_0 = const()[name = string("attention_8_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_8_attention_1_transpose_y_0 = const()[name = string("attention_8_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_8_attention_1 = matmul(transpose_x = attention_8_attention_1_transpose_x_0, transpose_y = attention_8_attention_1_transpose_y_0, x = softmax_17, y = attention_8_slice_value_cache_heads_1)[name = string("attention_8_attention_1")]; + int32 attention_8_concat_attention_all_heads_axis_0 = const()[name = string("attention_8_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_8_concat_attention_all_heads_interleave_0 = const()[name = string("attention_8_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_8_concat_attention_all_heads = concat(axis = attention_8_concat_attention_all_heads_axis_0, interleave = attention_8_concat_attention_all_heads_interleave_0, values = (attention_8_attention_0, attention_8_attention_1))[name = string("attention_8_concat_attention_all_heads")]; + tensor attention_8_channels_first_retransposed_perm_0 = const()[name = string("attention_8_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_8_reshaped_shape_0 = const()[name = string("attention_8_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_8_channels_first_retransposed = transpose(perm = attention_8_channels_first_retransposed_perm_0, x = attention_8_concat_attention_all_heads)[name = string("transpose_31")]; + tensor attention_8_reshaped = reshape(shape = attention_8_reshaped_shape_0, x = attention_8_channels_first_retransposed)[name = string("attention_8_reshaped")]; + tensor attention_8_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384907776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385509952))))[name = string("attention_8_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_36 = constexpr_blockwise_shift_scale(data = attention_8_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385538688))))[name = string("constexpr_blockwise_shift_scale_36")]; + tensor attention_8_outproj_strides_0 = const()[name = string("attention_8_outproj_strides_0"), val = tensor([1])]; + string attention_8_outproj_pad_type_0 = const()[name = string("attention_8_outproj_pad_type_0"), val = string("valid")]; + tensor attention_8_outproj_pad_0 = const()[name = string("attention_8_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_8_outproj_dilations_0 = const()[name = string("attention_8_outproj_dilations_0"), val = tensor([1])]; + int32 attention_8_outproj_groups_0 = const()[name = string("attention_8_outproj_groups_0"), val = int32(1)]; + tensor attention_8_outproj = conv(dilations = attention_8_outproj_dilations_0, groups = attention_8_outproj_groups_0, pad = attention_8_outproj_pad_0, pad_type = attention_8_outproj_pad_type_0, strides = attention_8_outproj_strides_0, weight = constexpr_blockwise_shift_scale_36, x = attention_8_reshaped)[name = string("attention_8_outproj")]; + tensor block_8_residual_1 = add(x = block_7_residual_2, y = attention_8_outproj)[name = string("block_8_residual_1")]; + tensor block_8_ffn_rmsnorm_abs = abs(x = block_8_residual_1)[name = string("block_8_ffn_rmsnorm_abs")]; + tensor block_8_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_8_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_8_ffn_rmsnorm_maxval = reduce_max(axes = block_8_ffn_rmsnorm_maxval_axes_0, keep_dims = block_8_ffn_rmsnorm_maxval_keep_dims_0, x = block_8_ffn_rmsnorm_abs)[name = string("block_8_ffn_rmsnorm_maxval")]; + fp16 block_8_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_8_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_8_ffn_rmsnorm_maxval_clipped = clip(alpha = block_8_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_8_ffn_rmsnorm_maxval_clipped_beta_0, x = block_8_ffn_rmsnorm_maxval)[name = string("block_8_ffn_rmsnorm_maxval_clipped")]; + tensor block_8_ffn_rmsnorm_scaled = real_div(x = block_8_residual_1, y = block_8_ffn_rmsnorm_maxval_clipped)[name = string("block_8_ffn_rmsnorm_scaled")]; + tensor block_8_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_8_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_8_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_8_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_8_ffn_rmsnorm_scaled)[name = string("block_8_ffn_rmsnorm_squared_sum")]; + fp16 block_8_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_8_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_8_ffn_rmsnorm_rsqrt_epsilon_0, x = block_8_ffn_rmsnorm_squared_sum)[name = string("block_8_ffn_rmsnorm_rsqrt")]; + fp16 block_8_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_8_ffn_rmsnorm_dim_scaled = mul(x = block_8_ffn_rmsnorm_scaled, y = block_8_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_8_ffn_rmsnorm_dim_scaled")]; + tensor block_8_ffn_rmsnorm_normalized = mul(x = block_8_ffn_rmsnorm_dim_scaled, y = block_8_ffn_rmsnorm_rsqrt)[name = string("block_8_ffn_rmsnorm_normalized")]; + tensor block_8_ffn_rmsnorm_y_0 = const()[name = string("block_8_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385540544)))]; + tensor block_8_ffn_rmsnorm = mul(x = block_8_ffn_rmsnorm_normalized, y = block_8_ffn_rmsnorm_y_0)[name = string("block_8_ffn_rmsnorm")]; + tensor block_8_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385542400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388811072))))[name = string("block_8_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_37 = constexpr_blockwise_shift_scale(data = block_8_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388966784))))[name = string("constexpr_blockwise_shift_scale_37")]; + tensor block_8_ffn_inproj_strides_0 = const()[name = string("block_8_ffn_inproj_strides_0"), val = tensor([1])]; + string block_8_ffn_inproj_pad_type_0 = const()[name = string("block_8_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_8_ffn_inproj_pad_0 = const()[name = string("block_8_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_8_ffn_inproj_dilations_0 = const()[name = string("block_8_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_8_ffn_inproj_groups_0 = const()[name = string("block_8_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_8_ffn_inproj = conv(dilations = block_8_ffn_inproj_dilations_0, groups = block_8_ffn_inproj_groups_0, pad = block_8_ffn_inproj_pad_0, pad_type = block_8_ffn_inproj_pad_type_0, strides = block_8_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_37, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_inproj")]; + tensor block_8_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388976576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392245248))))[name = string("block_8_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_38 = constexpr_blockwise_shift_scale(data = block_8_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392400960))))[name = string("constexpr_blockwise_shift_scale_38")]; + tensor block_8_ffn_g_strides_0 = const()[name = string("block_8_ffn_g_strides_0"), val = tensor([1])]; + string block_8_ffn_g_pad_type_0 = const()[name = string("block_8_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_8_ffn_g_pad_0 = const()[name = string("block_8_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_8_ffn_g_dilations_0 = const()[name = string("block_8_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_8_ffn_g_groups_0 = const()[name = string("block_8_ffn_g_groups_0"), val = int32(1)]; + tensor block_8_ffn_g = conv(dilations = block_8_ffn_g_dilations_0, groups = block_8_ffn_g_groups_0, pad = block_8_ffn_g_pad_0, pad_type = block_8_ffn_g_pad_type_0, strides = block_8_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_38, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_g")]; + tensor block_8_ffn_g_activation = silu(x = block_8_ffn_g)[name = string("block_8_ffn_g_activation")]; + tensor block_8_ffn_x_gated = mul(x = block_8_ffn_inproj, y = block_8_ffn_g_activation)[name = string("block_8_ffn_x_gated")]; + tensor block_8_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392410752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395679424))))[name = string("block_8_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_39 = constexpr_blockwise_shift_scale(data = block_8_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395708160))))[name = string("constexpr_blockwise_shift_scale_39")]; + tensor block_8_ffn_outproj_strides_0 = const()[name = string("block_8_ffn_outproj_strides_0"), val = tensor([1])]; + string block_8_ffn_outproj_pad_type_0 = const()[name = string("block_8_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_8_ffn_outproj_pad_0 = const()[name = string("block_8_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_8_ffn_outproj_dilations_0 = const()[name = string("block_8_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_8_ffn_outproj_groups_0 = const()[name = string("block_8_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_8_ffn_outproj = conv(dilations = block_8_ffn_outproj_dilations_0, groups = block_8_ffn_outproj_groups_0, pad = block_8_ffn_outproj_pad_0, pad_type = block_8_ffn_outproj_pad_type_0, strides = block_8_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_39, x = block_8_ffn_x_gated)[name = string("block_8_ffn_outproj")]; + tensor block_8_residual_2 = add(x = block_8_ffn_outproj, y = block_8_residual_1)[name = string("block_8_residual_2")]; + tensor block_9_attention_rmsnorm_abs = abs(x = block_8_residual_2)[name = string("block_9_attention_rmsnorm_abs")]; + tensor block_9_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_9_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_9_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_9_attention_rmsnorm_maxval = reduce_max(axes = block_9_attention_rmsnorm_maxval_axes_0, keep_dims = block_9_attention_rmsnorm_maxval_keep_dims_0, x = block_9_attention_rmsnorm_abs)[name = string("block_9_attention_rmsnorm_maxval")]; + fp16 block_9_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_9_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_9_attention_rmsnorm_maxval_clipped = clip(alpha = block_9_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_9_attention_rmsnorm_maxval_clipped_beta_0, x = block_9_attention_rmsnorm_maxval)[name = string("block_9_attention_rmsnorm_maxval_clipped")]; + tensor block_9_attention_rmsnorm_scaled = real_div(x = block_8_residual_2, y = block_9_attention_rmsnorm_maxval_clipped)[name = string("block_9_attention_rmsnorm_scaled")]; + tensor block_9_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_9_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_9_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_9_attention_rmsnorm_squared_sum_keep_dims_0, x = block_9_attention_rmsnorm_scaled)[name = string("block_9_attention_rmsnorm_squared_sum")]; + fp16 block_9_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_9_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_9_attention_rmsnorm_rsqrt_epsilon_0, x = block_9_attention_rmsnorm_squared_sum)[name = string("block_9_attention_rmsnorm_rsqrt")]; + fp16 block_9_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_9_attention_rmsnorm_dim_scaled = mul(x = block_9_attention_rmsnorm_scaled, y = block_9_attention_rmsnorm_dim_scaled_y_0)[name = string("block_9_attention_rmsnorm_dim_scaled")]; + tensor block_9_attention_rmsnorm_normalized = mul(x = block_9_attention_rmsnorm_dim_scaled, y = block_9_attention_rmsnorm_rsqrt)[name = string("block_9_attention_rmsnorm_normalized")]; + tensor block_9_attention_rmsnorm_y_0 = const()[name = string("block_9_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395710016)))]; + tensor block_9_attention_rmsnorm = mul(x = block_9_attention_rmsnorm_normalized, y = block_9_attention_rmsnorm_y_0)[name = string("block_9_attention_rmsnorm")]; + tensor attention_9_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395711872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396486080))))[name = string("attention_9_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_40 = constexpr_blockwise_shift_scale(data = attention_9_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396523008))))[name = string("constexpr_blockwise_shift_scale_40")]; + tensor attention_9_qkvproj_bias_0 = const()[name = string("attention_9_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396525376)))]; + tensor attention_9_qkvproj_strides_0 = const()[name = string("attention_9_qkvproj_strides_0"), val = tensor([1])]; + string attention_9_qkvproj_pad_type_0 = const()[name = string("attention_9_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_9_qkvproj_pad_0 = const()[name = string("attention_9_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_9_qkvproj_dilations_0 = const()[name = string("attention_9_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_9_qkvproj_groups_0 = const()[name = string("attention_9_qkvproj_groups_0"), val = int32(1)]; + tensor attention_9_qkvproj = conv(bias = attention_9_qkvproj_bias_0, dilations = attention_9_qkvproj_dilations_0, groups = attention_9_qkvproj_groups_0, pad = attention_9_qkvproj_pad_0, pad_type = attention_9_qkvproj_pad_type_0, strides = attention_9_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_40, x = block_9_attention_rmsnorm)[name = string("attention_9_qkvproj")]; + tensor attention_9_head_reshape_shape_0 = const()[name = string("attention_9_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_9_head_reshape = reshape(shape = attention_9_head_reshape_shape_0, x = attention_9_qkvproj)[name = string("attention_9_head_reshape")]; + tensor attention_9_head_transpose_perm_0 = const()[name = string("attention_9_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_9_split_qkv_heads_axis_0 = const()[name = string("attention_9_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_9_split_qkv_heads_split_sizes_0 = const()[name = string("attention_9_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_9_head_transpose = transpose(perm = attention_9_head_transpose_perm_0, x = attention_9_head_reshape)[name = string("transpose_30")]; + tensor attention_9_split_qkv_heads_0, tensor attention_9_split_qkv_heads_1, tensor attention_9_split_qkv_heads_2 = split(axis = attention_9_split_qkv_heads_axis_0, split_sizes = attention_9_split_qkv_heads_split_sizes_0, x = attention_9_head_transpose)[name = string("attention_9_split_qkv_heads")]; + tensor attention_9_q_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_9_q_rope_lhs_mult")]; + int32 attention_9_q_rotate_half_split_num_splits_0 = const()[name = string("attention_9_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_9_q_rotate_half_split_axis_0 = const()[name = string("attention_9_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_9_q_rotate_half_split_0, tensor attention_9_q_rotate_half_split_1 = split(axis = attention_9_q_rotate_half_split_axis_0, num_splits = attention_9_q_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_0)[name = string("attention_9_q_rotate_half_split")]; + fp16 attention_9_q_rotate_half_neg_y_0 = const()[name = string("attention_9_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_9_q_rotate_half_neg = mul(x = attention_9_q_rotate_half_split_1, y = attention_9_q_rotate_half_neg_y_0)[name = string("attention_9_q_rotate_half_neg")]; + int32 attention_9_q_rotate_half_concat_axis_0 = const()[name = string("attention_9_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_9_q_rotate_half_concat_interleave_0 = const()[name = string("attention_9_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_9_q_rotate_half_concat = concat(axis = attention_9_q_rotate_half_concat_axis_0, interleave = attention_9_q_rotate_half_concat_interleave_0, values = (attention_9_q_rotate_half_neg, attention_9_q_rotate_half_split_0))[name = string("attention_9_q_rotate_half_concat")]; + tensor attention_9_q_rope_rhs_mult = mul(x = attention_9_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_q_rope_rhs_mult")]; + tensor attention_9_q_rope = add(x = attention_9_q_rope_lhs_mult, y = attention_9_q_rope_rhs_mult)[name = string("attention_9_q_rope")]; + tensor attention_9_k_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_9_k_rope_lhs_mult")]; + int32 attention_9_k_rotate_half_split_num_splits_0 = const()[name = string("attention_9_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_9_k_rotate_half_split_axis_0 = const()[name = string("attention_9_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_9_k_rotate_half_split_0, tensor attention_9_k_rotate_half_split_1 = split(axis = attention_9_k_rotate_half_split_axis_0, num_splits = attention_9_k_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_1)[name = string("attention_9_k_rotate_half_split")]; + fp16 attention_9_k_rotate_half_neg_y_0 = const()[name = string("attention_9_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_9_k_rotate_half_neg = mul(x = attention_9_k_rotate_half_split_1, y = attention_9_k_rotate_half_neg_y_0)[name = string("attention_9_k_rotate_half_neg")]; + int32 attention_9_k_rotate_half_concat_axis_0 = const()[name = string("attention_9_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_9_k_rotate_half_concat_interleave_0 = const()[name = string("attention_9_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_9_k_rotate_half_concat = concat(axis = attention_9_k_rotate_half_concat_axis_0, interleave = attention_9_k_rotate_half_concat_interleave_0, values = (attention_9_k_rotate_half_neg, attention_9_k_rotate_half_split_0))[name = string("attention_9_k_rotate_half_concat")]; + tensor attention_9_k_rope_rhs_mult = mul(x = attention_9_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_k_rope_rhs_mult")]; + tensor attention_9_k_rope = add(x = attention_9_k_rope_lhs_mult, y = attention_9_k_rope_rhs_mult)[name = string("attention_9_k_rope")]; + int32 attention_9_q_splits_axis_0 = const()[name = string("attention_9_q_splits_axis_0"), val = int32(1)]; + int32 attention_9_q_splits_num_splits_0 = const()[name = string("attention_9_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_9_q_splits_0, tensor attention_9_q_splits_1 = split(axis = attention_9_q_splits_axis_0, num_splits = attention_9_q_splits_num_splits_0, x = attention_9_q_rope)[name = string("attention_9_q_splits")]; + tensor attention_9_update_begin_0_values0_0 = const()[name = string("attention_9_update_begin_0_values0_0"), val = tensor([9])]; + tensor attention_9_update_begin_0_values1_0 = const()[name = string("attention_9_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_9_update_begin_0_values3_0 = const()[name = string("attention_9_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_9_update_begin_0_axis_0 = const()[name = string("attention_9_update_begin_0_axis_0"), val = int32(0)]; + bool attention_9_update_begin_0_interleave_0 = const()[name = string("attention_9_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_9_update_begin_0 = concat(axis = attention_9_update_begin_0_axis_0, interleave = attention_9_update_begin_0_interleave_0, values = (attention_9_update_begin_0_values0_0, attention_9_update_begin_0_values1_0, query_pos1, attention_9_update_begin_0_values3_0))[name = string("attention_9_update_begin_0")]; + tensor attention_9_update_end_0_values0_0 = const()[name = string("attention_9_update_end_0_values0_0"), val = tensor([10])]; + tensor attention_9_update_end_0_values1_0 = const()[name = string("attention_9_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_9_update_end_0_values3_0 = const()[name = string("attention_9_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_9_update_end_0_axis_0 = const()[name = string("attention_9_update_end_0_axis_0"), val = int32(0)]; + bool attention_9_update_end_0_interleave_0 = const()[name = string("attention_9_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_9_update_end_0 = concat(axis = attention_9_update_end_0_axis_0, interleave = attention_9_update_end_0_interleave_0, values = (attention_9_update_end_0_values0_0, attention_9_update_end_0_values1_0, end_pos_0, attention_9_update_end_0_values3_0))[name = string("attention_9_update_end_0")]; + tensor attention_9_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_updated_key_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_key_cache_0_squeeze_mask_0, update = attention_9_k_rope, x = coreml_update_state_16)[name = string("attention_9_updated_key_cache_0")]; + write_state(data = attention_9_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_18 = read_state(input = key_cache_state)[name = string("coreml_update_state_66")]; + tensor attention_9_key_cache_begin_0 = const()[name = string("attention_9_key_cache_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor attention_9_key_cache_end_0 = const()[name = string("attention_9_key_cache_end_0"), val = tensor([10, 2, 512, 64])]; + tensor attention_9_key_cache_squeeze_mask_0 = const()[name = string("attention_9_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_key_cache = slice_by_index(begin = attention_9_key_cache_begin_0, end = attention_9_key_cache_end_0, squeeze_mask = attention_9_key_cache_squeeze_mask_0, x = coreml_update_state_18)[name = string("attention_9_key_cache")]; + int32 attention_9_key_cache_head_axis_0 = const()[name = string("attention_9_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_9_key_cache_head_num_splits_0 = const()[name = string("attention_9_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_9_key_cache_head_0, tensor attention_9_key_cache_head_1 = split(axis = attention_9_key_cache_head_axis_0, num_splits = attention_9_key_cache_head_num_splits_0, x = attention_9_key_cache)[name = string("attention_9_key_cache_head")]; + tensor attention_9_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_updated_value_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_value_cache_0_squeeze_mask_0, update = attention_9_split_qkv_heads_2, x = coreml_update_state_17)[name = string("attention_9_updated_value_cache_0")]; + write_state(data = attention_9_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_19 = read_state(input = value_cache_state)[name = string("coreml_update_state_67")]; + tensor attention_9_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_9_slice_current_layer_value_cache_begin_0"), val = tensor([9, 0, 0, 0])]; + tensor attention_9_slice_current_layer_value_cache_end_0 = const()[name = string("attention_9_slice_current_layer_value_cache_end_0"), val = tensor([10, 2, 512, 64])]; + tensor attention_9_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_9_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_9_slice_current_layer_value_cache = slice_by_index(begin = attention_9_slice_current_layer_value_cache_begin_0, end = attention_9_slice_current_layer_value_cache_end_0, squeeze_mask = attention_9_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_19)[name = string("attention_9_slice_current_layer_value_cache")]; + int32 attention_9_slice_value_cache_heads_axis_0 = const()[name = string("attention_9_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_9_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_9_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_9_slice_value_cache_heads_0, tensor attention_9_slice_value_cache_heads_1 = split(axis = attention_9_slice_value_cache_heads_axis_0, num_splits = attention_9_slice_value_cache_heads_num_splits_0, x = attention_9_slice_current_layer_value_cache)[name = string("attention_9_slice_value_cache_heads")]; + bool attention_9_scores_0_transpose_y_0 = const()[name = string("attention_9_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_9_scores_0_transpose_x_0 = const()[name = string("attention_9_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_9_scores_0 = matmul(transpose_x = attention_9_scores_0_transpose_x_0, transpose_y = attention_9_scores_0_transpose_y_0, x = attention_9_key_cache_head_0, y = attention_9_q_splits_0)[name = string("attention_9_scores_0")]; + fp16 attention_9_scaled_scores_0_y_0 = const()[name = string("attention_9_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_9_scaled_scores_0 = mul(x = attention_9_scores_0, y = attention_9_scaled_scores_0_y_0)[name = string("attention_9_scaled_scores_0")]; + tensor attention_9_masked_scaled_scores_0 = add(x = attention_9_scaled_scores_0, y = transpose_0)[name = string("attention_9_masked_scaled_scores_0")]; + int32 softmax_18_axis_0 = const()[name = string("softmax_18_axis_0"), val = int32(-2)]; + tensor softmax_18 = softmax(axis = softmax_18_axis_0, x = attention_9_masked_scaled_scores_0)[name = string("softmax_18")]; + bool attention_9_attention_0_transpose_x_0 = const()[name = string("attention_9_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_9_attention_0_transpose_y_0 = const()[name = string("attention_9_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_9_attention_0 = matmul(transpose_x = attention_9_attention_0_transpose_x_0, transpose_y = attention_9_attention_0_transpose_y_0, x = softmax_18, y = attention_9_slice_value_cache_heads_0)[name = string("attention_9_attention_0")]; + bool attention_9_scores_1_transpose_y_0 = const()[name = string("attention_9_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_9_scores_1_transpose_x_0 = const()[name = string("attention_9_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_9_scores_1 = matmul(transpose_x = attention_9_scores_1_transpose_x_0, transpose_y = attention_9_scores_1_transpose_y_0, x = attention_9_key_cache_head_1, y = attention_9_q_splits_1)[name = string("attention_9_scores_1")]; + fp16 attention_9_scaled_scores_1_y_0 = const()[name = string("attention_9_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_9_scaled_scores_1 = mul(x = attention_9_scores_1, y = attention_9_scaled_scores_1_y_0)[name = string("attention_9_scaled_scores_1")]; + tensor attention_9_masked_scaled_scores_1 = add(x = attention_9_scaled_scores_1, y = transpose_0)[name = string("attention_9_masked_scaled_scores_1")]; + int32 softmax_19_axis_0 = const()[name = string("softmax_19_axis_0"), val = int32(-2)]; + tensor softmax_19 = softmax(axis = softmax_19_axis_0, x = attention_9_masked_scaled_scores_1)[name = string("softmax_19")]; + bool attention_9_attention_1_transpose_x_0 = const()[name = string("attention_9_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_9_attention_1_transpose_y_0 = const()[name = string("attention_9_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_9_attention_1 = matmul(transpose_x = attention_9_attention_1_transpose_x_0, transpose_y = attention_9_attention_1_transpose_y_0, x = softmax_19, y = attention_9_slice_value_cache_heads_1)[name = string("attention_9_attention_1")]; + int32 attention_9_concat_attention_all_heads_axis_0 = const()[name = string("attention_9_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_9_concat_attention_all_heads_interleave_0 = const()[name = string("attention_9_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_9_concat_attention_all_heads = concat(axis = attention_9_concat_attention_all_heads_axis_0, interleave = attention_9_concat_attention_all_heads_interleave_0, values = (attention_9_attention_0, attention_9_attention_1))[name = string("attention_9_concat_attention_all_heads")]; + tensor attention_9_channels_first_retransposed_perm_0 = const()[name = string("attention_9_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_9_reshaped_shape_0 = const()[name = string("attention_9_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_9_channels_first_retransposed = transpose(perm = attention_9_channels_first_retransposed_perm_0, x = attention_9_concat_attention_all_heads)[name = string("transpose_29")]; + tensor attention_9_reshaped = reshape(shape = attention_9_reshaped_shape_0, x = attention_9_channels_first_retransposed)[name = string("attention_9_reshaped")]; + tensor attention_9_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396527744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397129920))))[name = string("attention_9_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_41 = constexpr_blockwise_shift_scale(data = attention_9_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397158656))))[name = string("constexpr_blockwise_shift_scale_41")]; + tensor attention_9_outproj_strides_0 = const()[name = string("attention_9_outproj_strides_0"), val = tensor([1])]; + string attention_9_outproj_pad_type_0 = const()[name = string("attention_9_outproj_pad_type_0"), val = string("valid")]; + tensor attention_9_outproj_pad_0 = const()[name = string("attention_9_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_9_outproj_dilations_0 = const()[name = string("attention_9_outproj_dilations_0"), val = tensor([1])]; + int32 attention_9_outproj_groups_0 = const()[name = string("attention_9_outproj_groups_0"), val = int32(1)]; + tensor attention_9_outproj = conv(dilations = attention_9_outproj_dilations_0, groups = attention_9_outproj_groups_0, pad = attention_9_outproj_pad_0, pad_type = attention_9_outproj_pad_type_0, strides = attention_9_outproj_strides_0, weight = constexpr_blockwise_shift_scale_41, x = attention_9_reshaped)[name = string("attention_9_outproj")]; + tensor block_9_residual_1 = add(x = block_8_residual_2, y = attention_9_outproj)[name = string("block_9_residual_1")]; + tensor block_9_ffn_rmsnorm_abs = abs(x = block_9_residual_1)[name = string("block_9_ffn_rmsnorm_abs")]; + tensor block_9_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_9_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_9_ffn_rmsnorm_maxval = reduce_max(axes = block_9_ffn_rmsnorm_maxval_axes_0, keep_dims = block_9_ffn_rmsnorm_maxval_keep_dims_0, x = block_9_ffn_rmsnorm_abs)[name = string("block_9_ffn_rmsnorm_maxval")]; + fp16 block_9_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_9_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_9_ffn_rmsnorm_maxval_clipped = clip(alpha = block_9_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_9_ffn_rmsnorm_maxval_clipped_beta_0, x = block_9_ffn_rmsnorm_maxval)[name = string("block_9_ffn_rmsnorm_maxval_clipped")]; + tensor block_9_ffn_rmsnorm_scaled = real_div(x = block_9_residual_1, y = block_9_ffn_rmsnorm_maxval_clipped)[name = string("block_9_ffn_rmsnorm_scaled")]; + tensor block_9_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_9_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_9_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_9_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_9_ffn_rmsnorm_scaled)[name = string("block_9_ffn_rmsnorm_squared_sum")]; + fp16 block_9_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_9_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_9_ffn_rmsnorm_rsqrt_epsilon_0, x = block_9_ffn_rmsnorm_squared_sum)[name = string("block_9_ffn_rmsnorm_rsqrt")]; + fp16 block_9_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_9_ffn_rmsnorm_dim_scaled = mul(x = block_9_ffn_rmsnorm_scaled, y = block_9_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_9_ffn_rmsnorm_dim_scaled")]; + tensor block_9_ffn_rmsnorm_normalized = mul(x = block_9_ffn_rmsnorm_dim_scaled, y = block_9_ffn_rmsnorm_rsqrt)[name = string("block_9_ffn_rmsnorm_normalized")]; + tensor block_9_ffn_rmsnorm_y_0 = const()[name = string("block_9_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397160512)))]; + tensor block_9_ffn_rmsnorm = mul(x = block_9_ffn_rmsnorm_normalized, y = block_9_ffn_rmsnorm_y_0)[name = string("block_9_ffn_rmsnorm")]; + tensor block_9_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397162368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400431040))))[name = string("block_9_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_42 = constexpr_blockwise_shift_scale(data = block_9_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400586752))))[name = string("constexpr_blockwise_shift_scale_42")]; + tensor block_9_ffn_inproj_strides_0 = const()[name = string("block_9_ffn_inproj_strides_0"), val = tensor([1])]; + string block_9_ffn_inproj_pad_type_0 = const()[name = string("block_9_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_9_ffn_inproj_pad_0 = const()[name = string("block_9_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_9_ffn_inproj_dilations_0 = const()[name = string("block_9_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_9_ffn_inproj_groups_0 = const()[name = string("block_9_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_9_ffn_inproj = conv(dilations = block_9_ffn_inproj_dilations_0, groups = block_9_ffn_inproj_groups_0, pad = block_9_ffn_inproj_pad_0, pad_type = block_9_ffn_inproj_pad_type_0, strides = block_9_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_42, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_inproj")]; + tensor block_9_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400596544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403865216))))[name = string("block_9_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_43 = constexpr_blockwise_shift_scale(data = block_9_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404020928))))[name = string("constexpr_blockwise_shift_scale_43")]; + tensor block_9_ffn_g_strides_0 = const()[name = string("block_9_ffn_g_strides_0"), val = tensor([1])]; + string block_9_ffn_g_pad_type_0 = const()[name = string("block_9_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_9_ffn_g_pad_0 = const()[name = string("block_9_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_9_ffn_g_dilations_0 = const()[name = string("block_9_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_9_ffn_g_groups_0 = const()[name = string("block_9_ffn_g_groups_0"), val = int32(1)]; + tensor block_9_ffn_g = conv(dilations = block_9_ffn_g_dilations_0, groups = block_9_ffn_g_groups_0, pad = block_9_ffn_g_pad_0, pad_type = block_9_ffn_g_pad_type_0, strides = block_9_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_43, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_g")]; + tensor block_9_ffn_g_activation = silu(x = block_9_ffn_g)[name = string("block_9_ffn_g_activation")]; + tensor block_9_ffn_x_gated = mul(x = block_9_ffn_inproj, y = block_9_ffn_g_activation)[name = string("block_9_ffn_x_gated")]; + tensor block_9_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404030720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407299392))))[name = string("block_9_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_44 = constexpr_blockwise_shift_scale(data = block_9_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407328128))))[name = string("constexpr_blockwise_shift_scale_44")]; + tensor block_9_ffn_outproj_strides_0 = const()[name = string("block_9_ffn_outproj_strides_0"), val = tensor([1])]; + string block_9_ffn_outproj_pad_type_0 = const()[name = string("block_9_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_9_ffn_outproj_pad_0 = const()[name = string("block_9_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_9_ffn_outproj_dilations_0 = const()[name = string("block_9_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_9_ffn_outproj_groups_0 = const()[name = string("block_9_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_9_ffn_outproj = conv(dilations = block_9_ffn_outproj_dilations_0, groups = block_9_ffn_outproj_groups_0, pad = block_9_ffn_outproj_pad_0, pad_type = block_9_ffn_outproj_pad_type_0, strides = block_9_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_44, x = block_9_ffn_x_gated)[name = string("block_9_ffn_outproj")]; + tensor block_9_residual_2 = add(x = block_9_ffn_outproj, y = block_9_residual_1)[name = string("block_9_residual_2")]; + tensor block_10_attention_rmsnorm_abs = abs(x = block_9_residual_2)[name = string("block_10_attention_rmsnorm_abs")]; + tensor block_10_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_10_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_10_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_10_attention_rmsnorm_maxval = reduce_max(axes = block_10_attention_rmsnorm_maxval_axes_0, keep_dims = block_10_attention_rmsnorm_maxval_keep_dims_0, x = block_10_attention_rmsnorm_abs)[name = string("block_10_attention_rmsnorm_maxval")]; + fp16 block_10_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_10_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_10_attention_rmsnorm_maxval_clipped = clip(alpha = block_10_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_10_attention_rmsnorm_maxval_clipped_beta_0, x = block_10_attention_rmsnorm_maxval)[name = string("block_10_attention_rmsnorm_maxval_clipped")]; + tensor block_10_attention_rmsnorm_scaled = real_div(x = block_9_residual_2, y = block_10_attention_rmsnorm_maxval_clipped)[name = string("block_10_attention_rmsnorm_scaled")]; + tensor block_10_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_10_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_10_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_10_attention_rmsnorm_squared_sum_keep_dims_0, x = block_10_attention_rmsnorm_scaled)[name = string("block_10_attention_rmsnorm_squared_sum")]; + fp16 block_10_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_10_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_10_attention_rmsnorm_rsqrt_epsilon_0, x = block_10_attention_rmsnorm_squared_sum)[name = string("block_10_attention_rmsnorm_rsqrt")]; + fp16 block_10_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_10_attention_rmsnorm_dim_scaled = mul(x = block_10_attention_rmsnorm_scaled, y = block_10_attention_rmsnorm_dim_scaled_y_0)[name = string("block_10_attention_rmsnorm_dim_scaled")]; + tensor block_10_attention_rmsnorm_normalized = mul(x = block_10_attention_rmsnorm_dim_scaled, y = block_10_attention_rmsnorm_rsqrt)[name = string("block_10_attention_rmsnorm_normalized")]; + tensor block_10_attention_rmsnorm_y_0 = const()[name = string("block_10_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407329984)))]; + tensor block_10_attention_rmsnorm = mul(x = block_10_attention_rmsnorm_normalized, y = block_10_attention_rmsnorm_y_0)[name = string("block_10_attention_rmsnorm")]; + tensor attention_10_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407331840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408106048))))[name = string("attention_10_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_45 = constexpr_blockwise_shift_scale(data = attention_10_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408142976))))[name = string("constexpr_blockwise_shift_scale_45")]; + tensor attention_10_qkvproj_bias_0 = const()[name = string("attention_10_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408145344)))]; + tensor attention_10_qkvproj_strides_0 = const()[name = string("attention_10_qkvproj_strides_0"), val = tensor([1])]; + string attention_10_qkvproj_pad_type_0 = const()[name = string("attention_10_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_10_qkvproj_pad_0 = const()[name = string("attention_10_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_10_qkvproj_dilations_0 = const()[name = string("attention_10_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_10_qkvproj_groups_0 = const()[name = string("attention_10_qkvproj_groups_0"), val = int32(1)]; + tensor attention_10_qkvproj = conv(bias = attention_10_qkvproj_bias_0, dilations = attention_10_qkvproj_dilations_0, groups = attention_10_qkvproj_groups_0, pad = attention_10_qkvproj_pad_0, pad_type = attention_10_qkvproj_pad_type_0, strides = attention_10_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_45, x = block_10_attention_rmsnorm)[name = string("attention_10_qkvproj")]; + tensor attention_10_head_reshape_shape_0 = const()[name = string("attention_10_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_10_head_reshape = reshape(shape = attention_10_head_reshape_shape_0, x = attention_10_qkvproj)[name = string("attention_10_head_reshape")]; + tensor attention_10_head_transpose_perm_0 = const()[name = string("attention_10_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_10_split_qkv_heads_axis_0 = const()[name = string("attention_10_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_10_split_qkv_heads_split_sizes_0 = const()[name = string("attention_10_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_10_head_transpose = transpose(perm = attention_10_head_transpose_perm_0, x = attention_10_head_reshape)[name = string("transpose_28")]; + tensor attention_10_split_qkv_heads_0, tensor attention_10_split_qkv_heads_1, tensor attention_10_split_qkv_heads_2 = split(axis = attention_10_split_qkv_heads_axis_0, split_sizes = attention_10_split_qkv_heads_split_sizes_0, x = attention_10_head_transpose)[name = string("attention_10_split_qkv_heads")]; + tensor attention_10_q_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_10_q_rope_lhs_mult")]; + int32 attention_10_q_rotate_half_split_num_splits_0 = const()[name = string("attention_10_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_10_q_rotate_half_split_axis_0 = const()[name = string("attention_10_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_10_q_rotate_half_split_0, tensor attention_10_q_rotate_half_split_1 = split(axis = attention_10_q_rotate_half_split_axis_0, num_splits = attention_10_q_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_0)[name = string("attention_10_q_rotate_half_split")]; + fp16 attention_10_q_rotate_half_neg_y_0 = const()[name = string("attention_10_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_10_q_rotate_half_neg = mul(x = attention_10_q_rotate_half_split_1, y = attention_10_q_rotate_half_neg_y_0)[name = string("attention_10_q_rotate_half_neg")]; + int32 attention_10_q_rotate_half_concat_axis_0 = const()[name = string("attention_10_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_10_q_rotate_half_concat_interleave_0 = const()[name = string("attention_10_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_10_q_rotate_half_concat = concat(axis = attention_10_q_rotate_half_concat_axis_0, interleave = attention_10_q_rotate_half_concat_interleave_0, values = (attention_10_q_rotate_half_neg, attention_10_q_rotate_half_split_0))[name = string("attention_10_q_rotate_half_concat")]; + tensor attention_10_q_rope_rhs_mult = mul(x = attention_10_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_q_rope_rhs_mult")]; + tensor attention_10_q_rope = add(x = attention_10_q_rope_lhs_mult, y = attention_10_q_rope_rhs_mult)[name = string("attention_10_q_rope")]; + tensor attention_10_k_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_10_k_rope_lhs_mult")]; + int32 attention_10_k_rotate_half_split_num_splits_0 = const()[name = string("attention_10_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_10_k_rotate_half_split_axis_0 = const()[name = string("attention_10_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_10_k_rotate_half_split_0, tensor attention_10_k_rotate_half_split_1 = split(axis = attention_10_k_rotate_half_split_axis_0, num_splits = attention_10_k_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_1)[name = string("attention_10_k_rotate_half_split")]; + fp16 attention_10_k_rotate_half_neg_y_0 = const()[name = string("attention_10_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_10_k_rotate_half_neg = mul(x = attention_10_k_rotate_half_split_1, y = attention_10_k_rotate_half_neg_y_0)[name = string("attention_10_k_rotate_half_neg")]; + int32 attention_10_k_rotate_half_concat_axis_0 = const()[name = string("attention_10_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_10_k_rotate_half_concat_interleave_0 = const()[name = string("attention_10_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_10_k_rotate_half_concat = concat(axis = attention_10_k_rotate_half_concat_axis_0, interleave = attention_10_k_rotate_half_concat_interleave_0, values = (attention_10_k_rotate_half_neg, attention_10_k_rotate_half_split_0))[name = string("attention_10_k_rotate_half_concat")]; + tensor attention_10_k_rope_rhs_mult = mul(x = attention_10_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_k_rope_rhs_mult")]; + tensor attention_10_k_rope = add(x = attention_10_k_rope_lhs_mult, y = attention_10_k_rope_rhs_mult)[name = string("attention_10_k_rope")]; + int32 attention_10_q_splits_axis_0 = const()[name = string("attention_10_q_splits_axis_0"), val = int32(1)]; + int32 attention_10_q_splits_num_splits_0 = const()[name = string("attention_10_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_10_q_splits_0, tensor attention_10_q_splits_1 = split(axis = attention_10_q_splits_axis_0, num_splits = attention_10_q_splits_num_splits_0, x = attention_10_q_rope)[name = string("attention_10_q_splits")]; + tensor attention_10_update_begin_0_values0_0 = const()[name = string("attention_10_update_begin_0_values0_0"), val = tensor([10])]; + tensor attention_10_update_begin_0_values1_0 = const()[name = string("attention_10_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_10_update_begin_0_values3_0 = const()[name = string("attention_10_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_10_update_begin_0_axis_0 = const()[name = string("attention_10_update_begin_0_axis_0"), val = int32(0)]; + bool attention_10_update_begin_0_interleave_0 = const()[name = string("attention_10_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_10_update_begin_0 = concat(axis = attention_10_update_begin_0_axis_0, interleave = attention_10_update_begin_0_interleave_0, values = (attention_10_update_begin_0_values0_0, attention_10_update_begin_0_values1_0, query_pos1, attention_10_update_begin_0_values3_0))[name = string("attention_10_update_begin_0")]; + tensor attention_10_update_end_0_values0_0 = const()[name = string("attention_10_update_end_0_values0_0"), val = tensor([11])]; + tensor attention_10_update_end_0_values1_0 = const()[name = string("attention_10_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_10_update_end_0_values3_0 = const()[name = string("attention_10_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_10_update_end_0_axis_0 = const()[name = string("attention_10_update_end_0_axis_0"), val = int32(0)]; + bool attention_10_update_end_0_interleave_0 = const()[name = string("attention_10_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_10_update_end_0 = concat(axis = attention_10_update_end_0_axis_0, interleave = attention_10_update_end_0_interleave_0, values = (attention_10_update_end_0_values0_0, attention_10_update_end_0_values1_0, end_pos_0, attention_10_update_end_0_values3_0))[name = string("attention_10_update_end_0")]; + tensor attention_10_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_updated_key_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_key_cache_0_squeeze_mask_0, update = attention_10_k_rope, x = coreml_update_state_18)[name = string("attention_10_updated_key_cache_0")]; + write_state(data = attention_10_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_20 = read_state(input = key_cache_state)[name = string("coreml_update_state_68")]; + tensor attention_10_key_cache_begin_0 = const()[name = string("attention_10_key_cache_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor attention_10_key_cache_end_0 = const()[name = string("attention_10_key_cache_end_0"), val = tensor([11, 2, 512, 64])]; + tensor attention_10_key_cache_squeeze_mask_0 = const()[name = string("attention_10_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_key_cache = slice_by_index(begin = attention_10_key_cache_begin_0, end = attention_10_key_cache_end_0, squeeze_mask = attention_10_key_cache_squeeze_mask_0, x = coreml_update_state_20)[name = string("attention_10_key_cache")]; + int32 attention_10_key_cache_head_axis_0 = const()[name = string("attention_10_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_10_key_cache_head_num_splits_0 = const()[name = string("attention_10_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_10_key_cache_head_0, tensor attention_10_key_cache_head_1 = split(axis = attention_10_key_cache_head_axis_0, num_splits = attention_10_key_cache_head_num_splits_0, x = attention_10_key_cache)[name = string("attention_10_key_cache_head")]; + tensor attention_10_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_updated_value_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_value_cache_0_squeeze_mask_0, update = attention_10_split_qkv_heads_2, x = coreml_update_state_19)[name = string("attention_10_updated_value_cache_0")]; + write_state(data = attention_10_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_21 = read_state(input = value_cache_state)[name = string("coreml_update_state_69")]; + tensor attention_10_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_10_slice_current_layer_value_cache_begin_0"), val = tensor([10, 0, 0, 0])]; + tensor attention_10_slice_current_layer_value_cache_end_0 = const()[name = string("attention_10_slice_current_layer_value_cache_end_0"), val = tensor([11, 2, 512, 64])]; + tensor attention_10_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_10_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_10_slice_current_layer_value_cache = slice_by_index(begin = attention_10_slice_current_layer_value_cache_begin_0, end = attention_10_slice_current_layer_value_cache_end_0, squeeze_mask = attention_10_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_21)[name = string("attention_10_slice_current_layer_value_cache")]; + int32 attention_10_slice_value_cache_heads_axis_0 = const()[name = string("attention_10_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_10_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_10_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_10_slice_value_cache_heads_0, tensor attention_10_slice_value_cache_heads_1 = split(axis = attention_10_slice_value_cache_heads_axis_0, num_splits = attention_10_slice_value_cache_heads_num_splits_0, x = attention_10_slice_current_layer_value_cache)[name = string("attention_10_slice_value_cache_heads")]; + bool attention_10_scores_0_transpose_y_0 = const()[name = string("attention_10_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_10_scores_0_transpose_x_0 = const()[name = string("attention_10_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_10_scores_0 = matmul(transpose_x = attention_10_scores_0_transpose_x_0, transpose_y = attention_10_scores_0_transpose_y_0, x = attention_10_key_cache_head_0, y = attention_10_q_splits_0)[name = string("attention_10_scores_0")]; + fp16 attention_10_scaled_scores_0_y_0 = const()[name = string("attention_10_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_10_scaled_scores_0 = mul(x = attention_10_scores_0, y = attention_10_scaled_scores_0_y_0)[name = string("attention_10_scaled_scores_0")]; + tensor attention_10_masked_scaled_scores_0 = add(x = attention_10_scaled_scores_0, y = transpose_0)[name = string("attention_10_masked_scaled_scores_0")]; + int32 softmax_20_axis_0 = const()[name = string("softmax_20_axis_0"), val = int32(-2)]; + tensor softmax_20 = softmax(axis = softmax_20_axis_0, x = attention_10_masked_scaled_scores_0)[name = string("softmax_20")]; + bool attention_10_attention_0_transpose_x_0 = const()[name = string("attention_10_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_10_attention_0_transpose_y_0 = const()[name = string("attention_10_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_10_attention_0 = matmul(transpose_x = attention_10_attention_0_transpose_x_0, transpose_y = attention_10_attention_0_transpose_y_0, x = softmax_20, y = attention_10_slice_value_cache_heads_0)[name = string("attention_10_attention_0")]; + bool attention_10_scores_1_transpose_y_0 = const()[name = string("attention_10_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_10_scores_1_transpose_x_0 = const()[name = string("attention_10_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_10_scores_1 = matmul(transpose_x = attention_10_scores_1_transpose_x_0, transpose_y = attention_10_scores_1_transpose_y_0, x = attention_10_key_cache_head_1, y = attention_10_q_splits_1)[name = string("attention_10_scores_1")]; + fp16 attention_10_scaled_scores_1_y_0 = const()[name = string("attention_10_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_10_scaled_scores_1 = mul(x = attention_10_scores_1, y = attention_10_scaled_scores_1_y_0)[name = string("attention_10_scaled_scores_1")]; + tensor attention_10_masked_scaled_scores_1 = add(x = attention_10_scaled_scores_1, y = transpose_0)[name = string("attention_10_masked_scaled_scores_1")]; + int32 softmax_21_axis_0 = const()[name = string("softmax_21_axis_0"), val = int32(-2)]; + tensor softmax_21 = softmax(axis = softmax_21_axis_0, x = attention_10_masked_scaled_scores_1)[name = string("softmax_21")]; + bool attention_10_attention_1_transpose_x_0 = const()[name = string("attention_10_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_10_attention_1_transpose_y_0 = const()[name = string("attention_10_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_10_attention_1 = matmul(transpose_x = attention_10_attention_1_transpose_x_0, transpose_y = attention_10_attention_1_transpose_y_0, x = softmax_21, y = attention_10_slice_value_cache_heads_1)[name = string("attention_10_attention_1")]; + int32 attention_10_concat_attention_all_heads_axis_0 = const()[name = string("attention_10_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_10_concat_attention_all_heads_interleave_0 = const()[name = string("attention_10_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_10_concat_attention_all_heads = concat(axis = attention_10_concat_attention_all_heads_axis_0, interleave = attention_10_concat_attention_all_heads_interleave_0, values = (attention_10_attention_0, attention_10_attention_1))[name = string("attention_10_concat_attention_all_heads")]; + tensor attention_10_channels_first_retransposed_perm_0 = const()[name = string("attention_10_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_10_reshaped_shape_0 = const()[name = string("attention_10_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_10_channels_first_retransposed = transpose(perm = attention_10_channels_first_retransposed_perm_0, x = attention_10_concat_attention_all_heads)[name = string("transpose_27")]; + tensor attention_10_reshaped = reshape(shape = attention_10_reshaped_shape_0, x = attention_10_channels_first_retransposed)[name = string("attention_10_reshaped")]; + tensor attention_10_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408147712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408749888))))[name = string("attention_10_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_46 = constexpr_blockwise_shift_scale(data = attention_10_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408778624))))[name = string("constexpr_blockwise_shift_scale_46")]; + tensor attention_10_outproj_strides_0 = const()[name = string("attention_10_outproj_strides_0"), val = tensor([1])]; + string attention_10_outproj_pad_type_0 = const()[name = string("attention_10_outproj_pad_type_0"), val = string("valid")]; + tensor attention_10_outproj_pad_0 = const()[name = string("attention_10_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_10_outproj_dilations_0 = const()[name = string("attention_10_outproj_dilations_0"), val = tensor([1])]; + int32 attention_10_outproj_groups_0 = const()[name = string("attention_10_outproj_groups_0"), val = int32(1)]; + tensor attention_10_outproj = conv(dilations = attention_10_outproj_dilations_0, groups = attention_10_outproj_groups_0, pad = attention_10_outproj_pad_0, pad_type = attention_10_outproj_pad_type_0, strides = attention_10_outproj_strides_0, weight = constexpr_blockwise_shift_scale_46, x = attention_10_reshaped)[name = string("attention_10_outproj")]; + tensor block_10_residual_1 = add(x = block_9_residual_2, y = attention_10_outproj)[name = string("block_10_residual_1")]; + tensor block_10_ffn_rmsnorm_abs = abs(x = block_10_residual_1)[name = string("block_10_ffn_rmsnorm_abs")]; + tensor block_10_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_10_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_10_ffn_rmsnorm_maxval = reduce_max(axes = block_10_ffn_rmsnorm_maxval_axes_0, keep_dims = block_10_ffn_rmsnorm_maxval_keep_dims_0, x = block_10_ffn_rmsnorm_abs)[name = string("block_10_ffn_rmsnorm_maxval")]; + fp16 block_10_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_10_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_10_ffn_rmsnorm_maxval_clipped = clip(alpha = block_10_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_10_ffn_rmsnorm_maxval_clipped_beta_0, x = block_10_ffn_rmsnorm_maxval)[name = string("block_10_ffn_rmsnorm_maxval_clipped")]; + tensor block_10_ffn_rmsnorm_scaled = real_div(x = block_10_residual_1, y = block_10_ffn_rmsnorm_maxval_clipped)[name = string("block_10_ffn_rmsnorm_scaled")]; + tensor block_10_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_10_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_10_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_10_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_10_ffn_rmsnorm_scaled)[name = string("block_10_ffn_rmsnorm_squared_sum")]; + fp16 block_10_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_10_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_10_ffn_rmsnorm_rsqrt_epsilon_0, x = block_10_ffn_rmsnorm_squared_sum)[name = string("block_10_ffn_rmsnorm_rsqrt")]; + fp16 block_10_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_10_ffn_rmsnorm_dim_scaled = mul(x = block_10_ffn_rmsnorm_scaled, y = block_10_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_10_ffn_rmsnorm_dim_scaled")]; + tensor block_10_ffn_rmsnorm_normalized = mul(x = block_10_ffn_rmsnorm_dim_scaled, y = block_10_ffn_rmsnorm_rsqrt)[name = string("block_10_ffn_rmsnorm_normalized")]; + tensor block_10_ffn_rmsnorm_y_0 = const()[name = string("block_10_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408780480)))]; + tensor block_10_ffn_rmsnorm = mul(x = block_10_ffn_rmsnorm_normalized, y = block_10_ffn_rmsnorm_y_0)[name = string("block_10_ffn_rmsnorm")]; + tensor block_10_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408782336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412051008))))[name = string("block_10_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_47 = constexpr_blockwise_shift_scale(data = block_10_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412206720))))[name = string("constexpr_blockwise_shift_scale_47")]; + tensor block_10_ffn_inproj_strides_0 = const()[name = string("block_10_ffn_inproj_strides_0"), val = tensor([1])]; + string block_10_ffn_inproj_pad_type_0 = const()[name = string("block_10_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_10_ffn_inproj_pad_0 = const()[name = string("block_10_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_10_ffn_inproj_dilations_0 = const()[name = string("block_10_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_10_ffn_inproj_groups_0 = const()[name = string("block_10_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_10_ffn_inproj = conv(dilations = block_10_ffn_inproj_dilations_0, groups = block_10_ffn_inproj_groups_0, pad = block_10_ffn_inproj_pad_0, pad_type = block_10_ffn_inproj_pad_type_0, strides = block_10_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_47, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_inproj")]; + tensor block_10_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412216512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415485184))))[name = string("block_10_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_48 = constexpr_blockwise_shift_scale(data = block_10_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415640896))))[name = string("constexpr_blockwise_shift_scale_48")]; + tensor block_10_ffn_g_strides_0 = const()[name = string("block_10_ffn_g_strides_0"), val = tensor([1])]; + string block_10_ffn_g_pad_type_0 = const()[name = string("block_10_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_10_ffn_g_pad_0 = const()[name = string("block_10_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_10_ffn_g_dilations_0 = const()[name = string("block_10_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_10_ffn_g_groups_0 = const()[name = string("block_10_ffn_g_groups_0"), val = int32(1)]; + tensor block_10_ffn_g = conv(dilations = block_10_ffn_g_dilations_0, groups = block_10_ffn_g_groups_0, pad = block_10_ffn_g_pad_0, pad_type = block_10_ffn_g_pad_type_0, strides = block_10_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_48, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_g")]; + tensor block_10_ffn_g_activation = silu(x = block_10_ffn_g)[name = string("block_10_ffn_g_activation")]; + tensor block_10_ffn_x_gated = mul(x = block_10_ffn_inproj, y = block_10_ffn_g_activation)[name = string("block_10_ffn_x_gated")]; + tensor block_10_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415650688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418919360))))[name = string("block_10_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_49 = constexpr_blockwise_shift_scale(data = block_10_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418948096))))[name = string("constexpr_blockwise_shift_scale_49")]; + tensor block_10_ffn_outproj_strides_0 = const()[name = string("block_10_ffn_outproj_strides_0"), val = tensor([1])]; + string block_10_ffn_outproj_pad_type_0 = const()[name = string("block_10_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_10_ffn_outproj_pad_0 = const()[name = string("block_10_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_10_ffn_outproj_dilations_0 = const()[name = string("block_10_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_10_ffn_outproj_groups_0 = const()[name = string("block_10_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_10_ffn_outproj = conv(dilations = block_10_ffn_outproj_dilations_0, groups = block_10_ffn_outproj_groups_0, pad = block_10_ffn_outproj_pad_0, pad_type = block_10_ffn_outproj_pad_type_0, strides = block_10_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_49, x = block_10_ffn_x_gated)[name = string("block_10_ffn_outproj")]; + tensor block_10_residual_2 = add(x = block_10_ffn_outproj, y = block_10_residual_1)[name = string("block_10_residual_2")]; + tensor block_11_attention_rmsnorm_abs = abs(x = block_10_residual_2)[name = string("block_11_attention_rmsnorm_abs")]; + tensor block_11_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_11_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_11_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_11_attention_rmsnorm_maxval = reduce_max(axes = block_11_attention_rmsnorm_maxval_axes_0, keep_dims = block_11_attention_rmsnorm_maxval_keep_dims_0, x = block_11_attention_rmsnorm_abs)[name = string("block_11_attention_rmsnorm_maxval")]; + fp16 block_11_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_11_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_11_attention_rmsnorm_maxval_clipped = clip(alpha = block_11_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_11_attention_rmsnorm_maxval_clipped_beta_0, x = block_11_attention_rmsnorm_maxval)[name = string("block_11_attention_rmsnorm_maxval_clipped")]; + tensor block_11_attention_rmsnorm_scaled = real_div(x = block_10_residual_2, y = block_11_attention_rmsnorm_maxval_clipped)[name = string("block_11_attention_rmsnorm_scaled")]; + tensor block_11_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_11_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_11_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_11_attention_rmsnorm_squared_sum_keep_dims_0, x = block_11_attention_rmsnorm_scaled)[name = string("block_11_attention_rmsnorm_squared_sum")]; + fp16 block_11_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_11_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_11_attention_rmsnorm_rsqrt_epsilon_0, x = block_11_attention_rmsnorm_squared_sum)[name = string("block_11_attention_rmsnorm_rsqrt")]; + fp16 block_11_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_11_attention_rmsnorm_dim_scaled = mul(x = block_11_attention_rmsnorm_scaled, y = block_11_attention_rmsnorm_dim_scaled_y_0)[name = string("block_11_attention_rmsnorm_dim_scaled")]; + tensor block_11_attention_rmsnorm_normalized = mul(x = block_11_attention_rmsnorm_dim_scaled, y = block_11_attention_rmsnorm_rsqrt)[name = string("block_11_attention_rmsnorm_normalized")]; + tensor block_11_attention_rmsnorm_y_0 = const()[name = string("block_11_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418949952)))]; + tensor block_11_attention_rmsnorm = mul(x = block_11_attention_rmsnorm_normalized, y = block_11_attention_rmsnorm_y_0)[name = string("block_11_attention_rmsnorm")]; + tensor attention_11_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418951808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419726016))))[name = string("attention_11_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_50 = constexpr_blockwise_shift_scale(data = attention_11_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419762944))))[name = string("constexpr_blockwise_shift_scale_50")]; + tensor attention_11_qkvproj_bias_0 = const()[name = string("attention_11_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419765312)))]; + tensor attention_11_qkvproj_strides_0 = const()[name = string("attention_11_qkvproj_strides_0"), val = tensor([1])]; + string attention_11_qkvproj_pad_type_0 = const()[name = string("attention_11_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_11_qkvproj_pad_0 = const()[name = string("attention_11_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_11_qkvproj_dilations_0 = const()[name = string("attention_11_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_11_qkvproj_groups_0 = const()[name = string("attention_11_qkvproj_groups_0"), val = int32(1)]; + tensor attention_11_qkvproj = conv(bias = attention_11_qkvproj_bias_0, dilations = attention_11_qkvproj_dilations_0, groups = attention_11_qkvproj_groups_0, pad = attention_11_qkvproj_pad_0, pad_type = attention_11_qkvproj_pad_type_0, strides = attention_11_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_50, x = block_11_attention_rmsnorm)[name = string("attention_11_qkvproj")]; + tensor attention_11_head_reshape_shape_0 = const()[name = string("attention_11_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_11_head_reshape = reshape(shape = attention_11_head_reshape_shape_0, x = attention_11_qkvproj)[name = string("attention_11_head_reshape")]; + tensor attention_11_head_transpose_perm_0 = const()[name = string("attention_11_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_11_split_qkv_heads_axis_0 = const()[name = string("attention_11_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_11_split_qkv_heads_split_sizes_0 = const()[name = string("attention_11_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_11_head_transpose = transpose(perm = attention_11_head_transpose_perm_0, x = attention_11_head_reshape)[name = string("transpose_26")]; + tensor attention_11_split_qkv_heads_0, tensor attention_11_split_qkv_heads_1, tensor attention_11_split_qkv_heads_2 = split(axis = attention_11_split_qkv_heads_axis_0, split_sizes = attention_11_split_qkv_heads_split_sizes_0, x = attention_11_head_transpose)[name = string("attention_11_split_qkv_heads")]; + tensor attention_11_q_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_11_q_rope_lhs_mult")]; + int32 attention_11_q_rotate_half_split_num_splits_0 = const()[name = string("attention_11_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_11_q_rotate_half_split_axis_0 = const()[name = string("attention_11_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_11_q_rotate_half_split_0, tensor attention_11_q_rotate_half_split_1 = split(axis = attention_11_q_rotate_half_split_axis_0, num_splits = attention_11_q_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_0)[name = string("attention_11_q_rotate_half_split")]; + fp16 attention_11_q_rotate_half_neg_y_0 = const()[name = string("attention_11_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_11_q_rotate_half_neg = mul(x = attention_11_q_rotate_half_split_1, y = attention_11_q_rotate_half_neg_y_0)[name = string("attention_11_q_rotate_half_neg")]; + int32 attention_11_q_rotate_half_concat_axis_0 = const()[name = string("attention_11_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_11_q_rotate_half_concat_interleave_0 = const()[name = string("attention_11_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_11_q_rotate_half_concat = concat(axis = attention_11_q_rotate_half_concat_axis_0, interleave = attention_11_q_rotate_half_concat_interleave_0, values = (attention_11_q_rotate_half_neg, attention_11_q_rotate_half_split_0))[name = string("attention_11_q_rotate_half_concat")]; + tensor attention_11_q_rope_rhs_mult = mul(x = attention_11_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_q_rope_rhs_mult")]; + tensor attention_11_q_rope = add(x = attention_11_q_rope_lhs_mult, y = attention_11_q_rope_rhs_mult)[name = string("attention_11_q_rope")]; + tensor attention_11_k_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_11_k_rope_lhs_mult")]; + int32 attention_11_k_rotate_half_split_num_splits_0 = const()[name = string("attention_11_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_11_k_rotate_half_split_axis_0 = const()[name = string("attention_11_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_11_k_rotate_half_split_0, tensor attention_11_k_rotate_half_split_1 = split(axis = attention_11_k_rotate_half_split_axis_0, num_splits = attention_11_k_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_1)[name = string("attention_11_k_rotate_half_split")]; + fp16 attention_11_k_rotate_half_neg_y_0 = const()[name = string("attention_11_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_11_k_rotate_half_neg = mul(x = attention_11_k_rotate_half_split_1, y = attention_11_k_rotate_half_neg_y_0)[name = string("attention_11_k_rotate_half_neg")]; + int32 attention_11_k_rotate_half_concat_axis_0 = const()[name = string("attention_11_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_11_k_rotate_half_concat_interleave_0 = const()[name = string("attention_11_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_11_k_rotate_half_concat = concat(axis = attention_11_k_rotate_half_concat_axis_0, interleave = attention_11_k_rotate_half_concat_interleave_0, values = (attention_11_k_rotate_half_neg, attention_11_k_rotate_half_split_0))[name = string("attention_11_k_rotate_half_concat")]; + tensor attention_11_k_rope_rhs_mult = mul(x = attention_11_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_k_rope_rhs_mult")]; + tensor attention_11_k_rope = add(x = attention_11_k_rope_lhs_mult, y = attention_11_k_rope_rhs_mult)[name = string("attention_11_k_rope")]; + int32 attention_11_q_splits_axis_0 = const()[name = string("attention_11_q_splits_axis_0"), val = int32(1)]; + int32 attention_11_q_splits_num_splits_0 = const()[name = string("attention_11_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_11_q_splits_0, tensor attention_11_q_splits_1 = split(axis = attention_11_q_splits_axis_0, num_splits = attention_11_q_splits_num_splits_0, x = attention_11_q_rope)[name = string("attention_11_q_splits")]; + tensor attention_11_update_begin_0_values0_0 = const()[name = string("attention_11_update_begin_0_values0_0"), val = tensor([11])]; + tensor attention_11_update_begin_0_values1_0 = const()[name = string("attention_11_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_11_update_begin_0_values3_0 = const()[name = string("attention_11_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_11_update_begin_0_axis_0 = const()[name = string("attention_11_update_begin_0_axis_0"), val = int32(0)]; + bool attention_11_update_begin_0_interleave_0 = const()[name = string("attention_11_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_11_update_begin_0 = concat(axis = attention_11_update_begin_0_axis_0, interleave = attention_11_update_begin_0_interleave_0, values = (attention_11_update_begin_0_values0_0, attention_11_update_begin_0_values1_0, query_pos1, attention_11_update_begin_0_values3_0))[name = string("attention_11_update_begin_0")]; + tensor attention_11_update_end_0_values0_0 = const()[name = string("attention_11_update_end_0_values0_0"), val = tensor([12])]; + tensor attention_11_update_end_0_values1_0 = const()[name = string("attention_11_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_11_update_end_0_values3_0 = const()[name = string("attention_11_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_11_update_end_0_axis_0 = const()[name = string("attention_11_update_end_0_axis_0"), val = int32(0)]; + bool attention_11_update_end_0_interleave_0 = const()[name = string("attention_11_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_11_update_end_0 = concat(axis = attention_11_update_end_0_axis_0, interleave = attention_11_update_end_0_interleave_0, values = (attention_11_update_end_0_values0_0, attention_11_update_end_0_values1_0, end_pos_0, attention_11_update_end_0_values3_0))[name = string("attention_11_update_end_0")]; + tensor attention_11_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_updated_key_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_key_cache_0_squeeze_mask_0, update = attention_11_k_rope, x = coreml_update_state_20)[name = string("attention_11_updated_key_cache_0")]; + write_state(data = attention_11_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_22 = read_state(input = key_cache_state)[name = string("coreml_update_state_70")]; + tensor attention_11_key_cache_begin_0 = const()[name = string("attention_11_key_cache_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor attention_11_key_cache_end_0 = const()[name = string("attention_11_key_cache_end_0"), val = tensor([12, 2, 512, 64])]; + tensor attention_11_key_cache_squeeze_mask_0 = const()[name = string("attention_11_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_key_cache = slice_by_index(begin = attention_11_key_cache_begin_0, end = attention_11_key_cache_end_0, squeeze_mask = attention_11_key_cache_squeeze_mask_0, x = coreml_update_state_22)[name = string("attention_11_key_cache")]; + int32 attention_11_key_cache_head_axis_0 = const()[name = string("attention_11_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_11_key_cache_head_num_splits_0 = const()[name = string("attention_11_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_11_key_cache_head_0, tensor attention_11_key_cache_head_1 = split(axis = attention_11_key_cache_head_axis_0, num_splits = attention_11_key_cache_head_num_splits_0, x = attention_11_key_cache)[name = string("attention_11_key_cache_head")]; + tensor attention_11_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_updated_value_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_value_cache_0_squeeze_mask_0, update = attention_11_split_qkv_heads_2, x = coreml_update_state_21)[name = string("attention_11_updated_value_cache_0")]; + write_state(data = attention_11_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_23 = read_state(input = value_cache_state)[name = string("coreml_update_state_71")]; + tensor attention_11_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_11_slice_current_layer_value_cache_begin_0"), val = tensor([11, 0, 0, 0])]; + tensor attention_11_slice_current_layer_value_cache_end_0 = const()[name = string("attention_11_slice_current_layer_value_cache_end_0"), val = tensor([12, 2, 512, 64])]; + tensor attention_11_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_11_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_11_slice_current_layer_value_cache = slice_by_index(begin = attention_11_slice_current_layer_value_cache_begin_0, end = attention_11_slice_current_layer_value_cache_end_0, squeeze_mask = attention_11_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_23)[name = string("attention_11_slice_current_layer_value_cache")]; + int32 attention_11_slice_value_cache_heads_axis_0 = const()[name = string("attention_11_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_11_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_11_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_11_slice_value_cache_heads_0, tensor attention_11_slice_value_cache_heads_1 = split(axis = attention_11_slice_value_cache_heads_axis_0, num_splits = attention_11_slice_value_cache_heads_num_splits_0, x = attention_11_slice_current_layer_value_cache)[name = string("attention_11_slice_value_cache_heads")]; + bool attention_11_scores_0_transpose_y_0 = const()[name = string("attention_11_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_11_scores_0_transpose_x_0 = const()[name = string("attention_11_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_11_scores_0 = matmul(transpose_x = attention_11_scores_0_transpose_x_0, transpose_y = attention_11_scores_0_transpose_y_0, x = attention_11_key_cache_head_0, y = attention_11_q_splits_0)[name = string("attention_11_scores_0")]; + fp16 attention_11_scaled_scores_0_y_0 = const()[name = string("attention_11_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_11_scaled_scores_0 = mul(x = attention_11_scores_0, y = attention_11_scaled_scores_0_y_0)[name = string("attention_11_scaled_scores_0")]; + tensor attention_11_masked_scaled_scores_0 = add(x = attention_11_scaled_scores_0, y = transpose_0)[name = string("attention_11_masked_scaled_scores_0")]; + int32 softmax_22_axis_0 = const()[name = string("softmax_22_axis_0"), val = int32(-2)]; + tensor softmax_22 = softmax(axis = softmax_22_axis_0, x = attention_11_masked_scaled_scores_0)[name = string("softmax_22")]; + bool attention_11_attention_0_transpose_x_0 = const()[name = string("attention_11_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_11_attention_0_transpose_y_0 = const()[name = string("attention_11_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_11_attention_0 = matmul(transpose_x = attention_11_attention_0_transpose_x_0, transpose_y = attention_11_attention_0_transpose_y_0, x = softmax_22, y = attention_11_slice_value_cache_heads_0)[name = string("attention_11_attention_0")]; + bool attention_11_scores_1_transpose_y_0 = const()[name = string("attention_11_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_11_scores_1_transpose_x_0 = const()[name = string("attention_11_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_11_scores_1 = matmul(transpose_x = attention_11_scores_1_transpose_x_0, transpose_y = attention_11_scores_1_transpose_y_0, x = attention_11_key_cache_head_1, y = attention_11_q_splits_1)[name = string("attention_11_scores_1")]; + fp16 attention_11_scaled_scores_1_y_0 = const()[name = string("attention_11_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_11_scaled_scores_1 = mul(x = attention_11_scores_1, y = attention_11_scaled_scores_1_y_0)[name = string("attention_11_scaled_scores_1")]; + tensor attention_11_masked_scaled_scores_1 = add(x = attention_11_scaled_scores_1, y = transpose_0)[name = string("attention_11_masked_scaled_scores_1")]; + int32 softmax_23_axis_0 = const()[name = string("softmax_23_axis_0"), val = int32(-2)]; + tensor softmax_23 = softmax(axis = softmax_23_axis_0, x = attention_11_masked_scaled_scores_1)[name = string("softmax_23")]; + bool attention_11_attention_1_transpose_x_0 = const()[name = string("attention_11_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_11_attention_1_transpose_y_0 = const()[name = string("attention_11_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_11_attention_1 = matmul(transpose_x = attention_11_attention_1_transpose_x_0, transpose_y = attention_11_attention_1_transpose_y_0, x = softmax_23, y = attention_11_slice_value_cache_heads_1)[name = string("attention_11_attention_1")]; + int32 attention_11_concat_attention_all_heads_axis_0 = const()[name = string("attention_11_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_11_concat_attention_all_heads_interleave_0 = const()[name = string("attention_11_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_11_concat_attention_all_heads = concat(axis = attention_11_concat_attention_all_heads_axis_0, interleave = attention_11_concat_attention_all_heads_interleave_0, values = (attention_11_attention_0, attention_11_attention_1))[name = string("attention_11_concat_attention_all_heads")]; + tensor attention_11_channels_first_retransposed_perm_0 = const()[name = string("attention_11_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_11_reshaped_shape_0 = const()[name = string("attention_11_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_11_channels_first_retransposed = transpose(perm = attention_11_channels_first_retransposed_perm_0, x = attention_11_concat_attention_all_heads)[name = string("transpose_25")]; + tensor attention_11_reshaped = reshape(shape = attention_11_reshaped_shape_0, x = attention_11_channels_first_retransposed)[name = string("attention_11_reshaped")]; + tensor attention_11_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419767680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420369856))))[name = string("attention_11_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_51 = constexpr_blockwise_shift_scale(data = attention_11_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420398592))))[name = string("constexpr_blockwise_shift_scale_51")]; + tensor attention_11_outproj_strides_0 = const()[name = string("attention_11_outproj_strides_0"), val = tensor([1])]; + string attention_11_outproj_pad_type_0 = const()[name = string("attention_11_outproj_pad_type_0"), val = string("valid")]; + tensor attention_11_outproj_pad_0 = const()[name = string("attention_11_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_11_outproj_dilations_0 = const()[name = string("attention_11_outproj_dilations_0"), val = tensor([1])]; + int32 attention_11_outproj_groups_0 = const()[name = string("attention_11_outproj_groups_0"), val = int32(1)]; + tensor attention_11_outproj = conv(dilations = attention_11_outproj_dilations_0, groups = attention_11_outproj_groups_0, pad = attention_11_outproj_pad_0, pad_type = attention_11_outproj_pad_type_0, strides = attention_11_outproj_strides_0, weight = constexpr_blockwise_shift_scale_51, x = attention_11_reshaped)[name = string("attention_11_outproj")]; + tensor block_11_residual_1 = add(x = block_10_residual_2, y = attention_11_outproj)[name = string("block_11_residual_1")]; + tensor block_11_ffn_rmsnorm_abs = abs(x = block_11_residual_1)[name = string("block_11_ffn_rmsnorm_abs")]; + tensor block_11_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_11_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_11_ffn_rmsnorm_maxval = reduce_max(axes = block_11_ffn_rmsnorm_maxval_axes_0, keep_dims = block_11_ffn_rmsnorm_maxval_keep_dims_0, x = block_11_ffn_rmsnorm_abs)[name = string("block_11_ffn_rmsnorm_maxval")]; + fp16 block_11_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_11_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_11_ffn_rmsnorm_maxval_clipped = clip(alpha = block_11_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_11_ffn_rmsnorm_maxval_clipped_beta_0, x = block_11_ffn_rmsnorm_maxval)[name = string("block_11_ffn_rmsnorm_maxval_clipped")]; + tensor block_11_ffn_rmsnorm_scaled = real_div(x = block_11_residual_1, y = block_11_ffn_rmsnorm_maxval_clipped)[name = string("block_11_ffn_rmsnorm_scaled")]; + tensor block_11_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_11_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_11_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_11_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_11_ffn_rmsnorm_scaled)[name = string("block_11_ffn_rmsnorm_squared_sum")]; + fp16 block_11_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_11_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_11_ffn_rmsnorm_rsqrt_epsilon_0, x = block_11_ffn_rmsnorm_squared_sum)[name = string("block_11_ffn_rmsnorm_rsqrt")]; + fp16 block_11_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_11_ffn_rmsnorm_dim_scaled = mul(x = block_11_ffn_rmsnorm_scaled, y = block_11_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_11_ffn_rmsnorm_dim_scaled")]; + tensor block_11_ffn_rmsnorm_normalized = mul(x = block_11_ffn_rmsnorm_dim_scaled, y = block_11_ffn_rmsnorm_rsqrt)[name = string("block_11_ffn_rmsnorm_normalized")]; + tensor block_11_ffn_rmsnorm_y_0 = const()[name = string("block_11_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420400448)))]; + tensor block_11_ffn_rmsnorm = mul(x = block_11_ffn_rmsnorm_normalized, y = block_11_ffn_rmsnorm_y_0)[name = string("block_11_ffn_rmsnorm")]; + tensor block_11_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420402304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423670976))))[name = string("block_11_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_52 = constexpr_blockwise_shift_scale(data = block_11_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423826688))))[name = string("constexpr_blockwise_shift_scale_52")]; + tensor block_11_ffn_inproj_strides_0 = const()[name = string("block_11_ffn_inproj_strides_0"), val = tensor([1])]; + string block_11_ffn_inproj_pad_type_0 = const()[name = string("block_11_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_11_ffn_inproj_pad_0 = const()[name = string("block_11_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_11_ffn_inproj_dilations_0 = const()[name = string("block_11_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_11_ffn_inproj_groups_0 = const()[name = string("block_11_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_11_ffn_inproj = conv(dilations = block_11_ffn_inproj_dilations_0, groups = block_11_ffn_inproj_groups_0, pad = block_11_ffn_inproj_pad_0, pad_type = block_11_ffn_inproj_pad_type_0, strides = block_11_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_52, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_inproj")]; + tensor block_11_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423836480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427105152))))[name = string("block_11_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_53 = constexpr_blockwise_shift_scale(data = block_11_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427260864))))[name = string("constexpr_blockwise_shift_scale_53")]; + tensor block_11_ffn_g_strides_0 = const()[name = string("block_11_ffn_g_strides_0"), val = tensor([1])]; + string block_11_ffn_g_pad_type_0 = const()[name = string("block_11_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_11_ffn_g_pad_0 = const()[name = string("block_11_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_11_ffn_g_dilations_0 = const()[name = string("block_11_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_11_ffn_g_groups_0 = const()[name = string("block_11_ffn_g_groups_0"), val = int32(1)]; + tensor block_11_ffn_g = conv(dilations = block_11_ffn_g_dilations_0, groups = block_11_ffn_g_groups_0, pad = block_11_ffn_g_pad_0, pad_type = block_11_ffn_g_pad_type_0, strides = block_11_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_53, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_g")]; + tensor block_11_ffn_g_activation = silu(x = block_11_ffn_g)[name = string("block_11_ffn_g_activation")]; + tensor block_11_ffn_x_gated = mul(x = block_11_ffn_inproj, y = block_11_ffn_g_activation)[name = string("block_11_ffn_x_gated")]; + tensor block_11_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427270656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430539328))))[name = string("block_11_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_54 = constexpr_blockwise_shift_scale(data = block_11_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430568064))))[name = string("constexpr_blockwise_shift_scale_54")]; + tensor block_11_ffn_outproj_strides_0 = const()[name = string("block_11_ffn_outproj_strides_0"), val = tensor([1])]; + string block_11_ffn_outproj_pad_type_0 = const()[name = string("block_11_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_11_ffn_outproj_pad_0 = const()[name = string("block_11_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_11_ffn_outproj_dilations_0 = const()[name = string("block_11_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_11_ffn_outproj_groups_0 = const()[name = string("block_11_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_11_ffn_outproj = conv(dilations = block_11_ffn_outproj_dilations_0, groups = block_11_ffn_outproj_groups_0, pad = block_11_ffn_outproj_pad_0, pad_type = block_11_ffn_outproj_pad_type_0, strides = block_11_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_54, x = block_11_ffn_x_gated)[name = string("block_11_ffn_outproj")]; + tensor block_11_residual_2 = add(x = block_11_ffn_outproj, y = block_11_residual_1)[name = string("block_11_residual_2")]; + tensor block_12_attention_rmsnorm_abs = abs(x = block_11_residual_2)[name = string("block_12_attention_rmsnorm_abs")]; + tensor block_12_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_12_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_12_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_12_attention_rmsnorm_maxval = reduce_max(axes = block_12_attention_rmsnorm_maxval_axes_0, keep_dims = block_12_attention_rmsnorm_maxval_keep_dims_0, x = block_12_attention_rmsnorm_abs)[name = string("block_12_attention_rmsnorm_maxval")]; + fp16 block_12_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_12_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_12_attention_rmsnorm_maxval_clipped = clip(alpha = block_12_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_12_attention_rmsnorm_maxval_clipped_beta_0, x = block_12_attention_rmsnorm_maxval)[name = string("block_12_attention_rmsnorm_maxval_clipped")]; + tensor block_12_attention_rmsnorm_scaled = real_div(x = block_11_residual_2, y = block_12_attention_rmsnorm_maxval_clipped)[name = string("block_12_attention_rmsnorm_scaled")]; + tensor block_12_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_12_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_12_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_12_attention_rmsnorm_squared_sum_keep_dims_0, x = block_12_attention_rmsnorm_scaled)[name = string("block_12_attention_rmsnorm_squared_sum")]; + fp16 block_12_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_12_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_12_attention_rmsnorm_rsqrt_epsilon_0, x = block_12_attention_rmsnorm_squared_sum)[name = string("block_12_attention_rmsnorm_rsqrt")]; + fp16 block_12_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_12_attention_rmsnorm_dim_scaled = mul(x = block_12_attention_rmsnorm_scaled, y = block_12_attention_rmsnorm_dim_scaled_y_0)[name = string("block_12_attention_rmsnorm_dim_scaled")]; + tensor block_12_attention_rmsnorm_normalized = mul(x = block_12_attention_rmsnorm_dim_scaled, y = block_12_attention_rmsnorm_rsqrt)[name = string("block_12_attention_rmsnorm_normalized")]; + tensor block_12_attention_rmsnorm_y_0 = const()[name = string("block_12_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430569920)))]; + tensor block_12_attention_rmsnorm = mul(x = block_12_attention_rmsnorm_normalized, y = block_12_attention_rmsnorm_y_0)[name = string("block_12_attention_rmsnorm")]; + tensor attention_12_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430571776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431345984))))[name = string("attention_12_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_55 = constexpr_blockwise_shift_scale(data = attention_12_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431382912))))[name = string("constexpr_blockwise_shift_scale_55")]; + tensor attention_12_qkvproj_bias_0 = const()[name = string("attention_12_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431385280)))]; + tensor attention_12_qkvproj_strides_0 = const()[name = string("attention_12_qkvproj_strides_0"), val = tensor([1])]; + string attention_12_qkvproj_pad_type_0 = const()[name = string("attention_12_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_12_qkvproj_pad_0 = const()[name = string("attention_12_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_12_qkvproj_dilations_0 = const()[name = string("attention_12_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_12_qkvproj_groups_0 = const()[name = string("attention_12_qkvproj_groups_0"), val = int32(1)]; + tensor attention_12_qkvproj = conv(bias = attention_12_qkvproj_bias_0, dilations = attention_12_qkvproj_dilations_0, groups = attention_12_qkvproj_groups_0, pad = attention_12_qkvproj_pad_0, pad_type = attention_12_qkvproj_pad_type_0, strides = attention_12_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_55, x = block_12_attention_rmsnorm)[name = string("attention_12_qkvproj")]; + tensor attention_12_head_reshape_shape_0 = const()[name = string("attention_12_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_12_head_reshape = reshape(shape = attention_12_head_reshape_shape_0, x = attention_12_qkvproj)[name = string("attention_12_head_reshape")]; + tensor attention_12_head_transpose_perm_0 = const()[name = string("attention_12_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_12_split_qkv_heads_axis_0 = const()[name = string("attention_12_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_12_split_qkv_heads_split_sizes_0 = const()[name = string("attention_12_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_12_head_transpose = transpose(perm = attention_12_head_transpose_perm_0, x = attention_12_head_reshape)[name = string("transpose_24")]; + tensor attention_12_split_qkv_heads_0, tensor attention_12_split_qkv_heads_1, tensor attention_12_split_qkv_heads_2 = split(axis = attention_12_split_qkv_heads_axis_0, split_sizes = attention_12_split_qkv_heads_split_sizes_0, x = attention_12_head_transpose)[name = string("attention_12_split_qkv_heads")]; + tensor attention_12_q_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_12_q_rope_lhs_mult")]; + int32 attention_12_q_rotate_half_split_num_splits_0 = const()[name = string("attention_12_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_12_q_rotate_half_split_axis_0 = const()[name = string("attention_12_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_12_q_rotate_half_split_0, tensor attention_12_q_rotate_half_split_1 = split(axis = attention_12_q_rotate_half_split_axis_0, num_splits = attention_12_q_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_0)[name = string("attention_12_q_rotate_half_split")]; + fp16 attention_12_q_rotate_half_neg_y_0 = const()[name = string("attention_12_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_12_q_rotate_half_neg = mul(x = attention_12_q_rotate_half_split_1, y = attention_12_q_rotate_half_neg_y_0)[name = string("attention_12_q_rotate_half_neg")]; + int32 attention_12_q_rotate_half_concat_axis_0 = const()[name = string("attention_12_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_12_q_rotate_half_concat_interleave_0 = const()[name = string("attention_12_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_12_q_rotate_half_concat = concat(axis = attention_12_q_rotate_half_concat_axis_0, interleave = attention_12_q_rotate_half_concat_interleave_0, values = (attention_12_q_rotate_half_neg, attention_12_q_rotate_half_split_0))[name = string("attention_12_q_rotate_half_concat")]; + tensor attention_12_q_rope_rhs_mult = mul(x = attention_12_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_q_rope_rhs_mult")]; + tensor attention_12_q_rope = add(x = attention_12_q_rope_lhs_mult, y = attention_12_q_rope_rhs_mult)[name = string("attention_12_q_rope")]; + tensor attention_12_k_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_12_k_rope_lhs_mult")]; + int32 attention_12_k_rotate_half_split_num_splits_0 = const()[name = string("attention_12_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_12_k_rotate_half_split_axis_0 = const()[name = string("attention_12_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_12_k_rotate_half_split_0, tensor attention_12_k_rotate_half_split_1 = split(axis = attention_12_k_rotate_half_split_axis_0, num_splits = attention_12_k_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_1)[name = string("attention_12_k_rotate_half_split")]; + fp16 attention_12_k_rotate_half_neg_y_0 = const()[name = string("attention_12_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_12_k_rotate_half_neg = mul(x = attention_12_k_rotate_half_split_1, y = attention_12_k_rotate_half_neg_y_0)[name = string("attention_12_k_rotate_half_neg")]; + int32 attention_12_k_rotate_half_concat_axis_0 = const()[name = string("attention_12_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_12_k_rotate_half_concat_interleave_0 = const()[name = string("attention_12_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_12_k_rotate_half_concat = concat(axis = attention_12_k_rotate_half_concat_axis_0, interleave = attention_12_k_rotate_half_concat_interleave_0, values = (attention_12_k_rotate_half_neg, attention_12_k_rotate_half_split_0))[name = string("attention_12_k_rotate_half_concat")]; + tensor attention_12_k_rope_rhs_mult = mul(x = attention_12_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_k_rope_rhs_mult")]; + tensor attention_12_k_rope = add(x = attention_12_k_rope_lhs_mult, y = attention_12_k_rope_rhs_mult)[name = string("attention_12_k_rope")]; + int32 attention_12_q_splits_axis_0 = const()[name = string("attention_12_q_splits_axis_0"), val = int32(1)]; + int32 attention_12_q_splits_num_splits_0 = const()[name = string("attention_12_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_12_q_splits_0, tensor attention_12_q_splits_1 = split(axis = attention_12_q_splits_axis_0, num_splits = attention_12_q_splits_num_splits_0, x = attention_12_q_rope)[name = string("attention_12_q_splits")]; + tensor attention_12_update_begin_0_values0_0 = const()[name = string("attention_12_update_begin_0_values0_0"), val = tensor([12])]; + tensor attention_12_update_begin_0_values1_0 = const()[name = string("attention_12_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_12_update_begin_0_values3_0 = const()[name = string("attention_12_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_12_update_begin_0_axis_0 = const()[name = string("attention_12_update_begin_0_axis_0"), val = int32(0)]; + bool attention_12_update_begin_0_interleave_0 = const()[name = string("attention_12_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_12_update_begin_0 = concat(axis = attention_12_update_begin_0_axis_0, interleave = attention_12_update_begin_0_interleave_0, values = (attention_12_update_begin_0_values0_0, attention_12_update_begin_0_values1_0, query_pos1, attention_12_update_begin_0_values3_0))[name = string("attention_12_update_begin_0")]; + tensor attention_12_update_end_0_values0_0 = const()[name = string("attention_12_update_end_0_values0_0"), val = tensor([13])]; + tensor attention_12_update_end_0_values1_0 = const()[name = string("attention_12_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_12_update_end_0_values3_0 = const()[name = string("attention_12_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_12_update_end_0_axis_0 = const()[name = string("attention_12_update_end_0_axis_0"), val = int32(0)]; + bool attention_12_update_end_0_interleave_0 = const()[name = string("attention_12_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_12_update_end_0 = concat(axis = attention_12_update_end_0_axis_0, interleave = attention_12_update_end_0_interleave_0, values = (attention_12_update_end_0_values0_0, attention_12_update_end_0_values1_0, end_pos_0, attention_12_update_end_0_values3_0))[name = string("attention_12_update_end_0")]; + tensor attention_12_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_updated_key_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_key_cache_0_squeeze_mask_0, update = attention_12_k_rope, x = coreml_update_state_22)[name = string("attention_12_updated_key_cache_0")]; + write_state(data = attention_12_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_24 = read_state(input = key_cache_state)[name = string("coreml_update_state_72")]; + tensor attention_12_key_cache_begin_0 = const()[name = string("attention_12_key_cache_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor attention_12_key_cache_end_0 = const()[name = string("attention_12_key_cache_end_0"), val = tensor([13, 2, 512, 64])]; + tensor attention_12_key_cache_squeeze_mask_0 = const()[name = string("attention_12_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_key_cache = slice_by_index(begin = attention_12_key_cache_begin_0, end = attention_12_key_cache_end_0, squeeze_mask = attention_12_key_cache_squeeze_mask_0, x = coreml_update_state_24)[name = string("attention_12_key_cache")]; + int32 attention_12_key_cache_head_axis_0 = const()[name = string("attention_12_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_12_key_cache_head_num_splits_0 = const()[name = string("attention_12_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_12_key_cache_head_0, tensor attention_12_key_cache_head_1 = split(axis = attention_12_key_cache_head_axis_0, num_splits = attention_12_key_cache_head_num_splits_0, x = attention_12_key_cache)[name = string("attention_12_key_cache_head")]; + tensor attention_12_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_updated_value_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_value_cache_0_squeeze_mask_0, update = attention_12_split_qkv_heads_2, x = coreml_update_state_23)[name = string("attention_12_updated_value_cache_0")]; + write_state(data = attention_12_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_25 = read_state(input = value_cache_state)[name = string("coreml_update_state_73")]; + tensor attention_12_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_12_slice_current_layer_value_cache_begin_0"), val = tensor([12, 0, 0, 0])]; + tensor attention_12_slice_current_layer_value_cache_end_0 = const()[name = string("attention_12_slice_current_layer_value_cache_end_0"), val = tensor([13, 2, 512, 64])]; + tensor attention_12_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_12_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_12_slice_current_layer_value_cache = slice_by_index(begin = attention_12_slice_current_layer_value_cache_begin_0, end = attention_12_slice_current_layer_value_cache_end_0, squeeze_mask = attention_12_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_25)[name = string("attention_12_slice_current_layer_value_cache")]; + int32 attention_12_slice_value_cache_heads_axis_0 = const()[name = string("attention_12_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_12_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_12_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_12_slice_value_cache_heads_0, tensor attention_12_slice_value_cache_heads_1 = split(axis = attention_12_slice_value_cache_heads_axis_0, num_splits = attention_12_slice_value_cache_heads_num_splits_0, x = attention_12_slice_current_layer_value_cache)[name = string("attention_12_slice_value_cache_heads")]; + bool attention_12_scores_0_transpose_y_0 = const()[name = string("attention_12_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_12_scores_0_transpose_x_0 = const()[name = string("attention_12_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_12_scores_0 = matmul(transpose_x = attention_12_scores_0_transpose_x_0, transpose_y = attention_12_scores_0_transpose_y_0, x = attention_12_key_cache_head_0, y = attention_12_q_splits_0)[name = string("attention_12_scores_0")]; + fp16 attention_12_scaled_scores_0_y_0 = const()[name = string("attention_12_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_12_scaled_scores_0 = mul(x = attention_12_scores_0, y = attention_12_scaled_scores_0_y_0)[name = string("attention_12_scaled_scores_0")]; + tensor attention_12_masked_scaled_scores_0 = add(x = attention_12_scaled_scores_0, y = transpose_0)[name = string("attention_12_masked_scaled_scores_0")]; + int32 softmax_24_axis_0 = const()[name = string("softmax_24_axis_0"), val = int32(-2)]; + tensor softmax_24 = softmax(axis = softmax_24_axis_0, x = attention_12_masked_scaled_scores_0)[name = string("softmax_24")]; + bool attention_12_attention_0_transpose_x_0 = const()[name = string("attention_12_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_12_attention_0_transpose_y_0 = const()[name = string("attention_12_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_12_attention_0 = matmul(transpose_x = attention_12_attention_0_transpose_x_0, transpose_y = attention_12_attention_0_transpose_y_0, x = softmax_24, y = attention_12_slice_value_cache_heads_0)[name = string("attention_12_attention_0")]; + bool attention_12_scores_1_transpose_y_0 = const()[name = string("attention_12_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_12_scores_1_transpose_x_0 = const()[name = string("attention_12_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_12_scores_1 = matmul(transpose_x = attention_12_scores_1_transpose_x_0, transpose_y = attention_12_scores_1_transpose_y_0, x = attention_12_key_cache_head_1, y = attention_12_q_splits_1)[name = string("attention_12_scores_1")]; + fp16 attention_12_scaled_scores_1_y_0 = const()[name = string("attention_12_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_12_scaled_scores_1 = mul(x = attention_12_scores_1, y = attention_12_scaled_scores_1_y_0)[name = string("attention_12_scaled_scores_1")]; + tensor attention_12_masked_scaled_scores_1 = add(x = attention_12_scaled_scores_1, y = transpose_0)[name = string("attention_12_masked_scaled_scores_1")]; + int32 softmax_25_axis_0 = const()[name = string("softmax_25_axis_0"), val = int32(-2)]; + tensor softmax_25 = softmax(axis = softmax_25_axis_0, x = attention_12_masked_scaled_scores_1)[name = string("softmax_25")]; + bool attention_12_attention_1_transpose_x_0 = const()[name = string("attention_12_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_12_attention_1_transpose_y_0 = const()[name = string("attention_12_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_12_attention_1 = matmul(transpose_x = attention_12_attention_1_transpose_x_0, transpose_y = attention_12_attention_1_transpose_y_0, x = softmax_25, y = attention_12_slice_value_cache_heads_1)[name = string("attention_12_attention_1")]; + int32 attention_12_concat_attention_all_heads_axis_0 = const()[name = string("attention_12_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_12_concat_attention_all_heads_interleave_0 = const()[name = string("attention_12_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_12_concat_attention_all_heads = concat(axis = attention_12_concat_attention_all_heads_axis_0, interleave = attention_12_concat_attention_all_heads_interleave_0, values = (attention_12_attention_0, attention_12_attention_1))[name = string("attention_12_concat_attention_all_heads")]; + tensor attention_12_channels_first_retransposed_perm_0 = const()[name = string("attention_12_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_12_reshaped_shape_0 = const()[name = string("attention_12_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_12_channels_first_retransposed = transpose(perm = attention_12_channels_first_retransposed_perm_0, x = attention_12_concat_attention_all_heads)[name = string("transpose_23")]; + tensor attention_12_reshaped = reshape(shape = attention_12_reshaped_shape_0, x = attention_12_channels_first_retransposed)[name = string("attention_12_reshaped")]; + tensor attention_12_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431387648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431989824))))[name = string("attention_12_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_56 = constexpr_blockwise_shift_scale(data = attention_12_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432018560))))[name = string("constexpr_blockwise_shift_scale_56")]; + tensor attention_12_outproj_strides_0 = const()[name = string("attention_12_outproj_strides_0"), val = tensor([1])]; + string attention_12_outproj_pad_type_0 = const()[name = string("attention_12_outproj_pad_type_0"), val = string("valid")]; + tensor attention_12_outproj_pad_0 = const()[name = string("attention_12_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_12_outproj_dilations_0 = const()[name = string("attention_12_outproj_dilations_0"), val = tensor([1])]; + int32 attention_12_outproj_groups_0 = const()[name = string("attention_12_outproj_groups_0"), val = int32(1)]; + tensor attention_12_outproj = conv(dilations = attention_12_outproj_dilations_0, groups = attention_12_outproj_groups_0, pad = attention_12_outproj_pad_0, pad_type = attention_12_outproj_pad_type_0, strides = attention_12_outproj_strides_0, weight = constexpr_blockwise_shift_scale_56, x = attention_12_reshaped)[name = string("attention_12_outproj")]; + tensor block_12_residual_1 = add(x = block_11_residual_2, y = attention_12_outproj)[name = string("block_12_residual_1")]; + tensor block_12_ffn_rmsnorm_abs = abs(x = block_12_residual_1)[name = string("block_12_ffn_rmsnorm_abs")]; + tensor block_12_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_12_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_12_ffn_rmsnorm_maxval = reduce_max(axes = block_12_ffn_rmsnorm_maxval_axes_0, keep_dims = block_12_ffn_rmsnorm_maxval_keep_dims_0, x = block_12_ffn_rmsnorm_abs)[name = string("block_12_ffn_rmsnorm_maxval")]; + fp16 block_12_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_12_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_12_ffn_rmsnorm_maxval_clipped = clip(alpha = block_12_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_12_ffn_rmsnorm_maxval_clipped_beta_0, x = block_12_ffn_rmsnorm_maxval)[name = string("block_12_ffn_rmsnorm_maxval_clipped")]; + tensor block_12_ffn_rmsnorm_scaled = real_div(x = block_12_residual_1, y = block_12_ffn_rmsnorm_maxval_clipped)[name = string("block_12_ffn_rmsnorm_scaled")]; + tensor block_12_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_12_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_12_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_12_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_12_ffn_rmsnorm_scaled)[name = string("block_12_ffn_rmsnorm_squared_sum")]; + fp16 block_12_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_12_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_12_ffn_rmsnorm_rsqrt_epsilon_0, x = block_12_ffn_rmsnorm_squared_sum)[name = string("block_12_ffn_rmsnorm_rsqrt")]; + fp16 block_12_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_12_ffn_rmsnorm_dim_scaled = mul(x = block_12_ffn_rmsnorm_scaled, y = block_12_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_12_ffn_rmsnorm_dim_scaled")]; + tensor block_12_ffn_rmsnorm_normalized = mul(x = block_12_ffn_rmsnorm_dim_scaled, y = block_12_ffn_rmsnorm_rsqrt)[name = string("block_12_ffn_rmsnorm_normalized")]; + tensor block_12_ffn_rmsnorm_y_0 = const()[name = string("block_12_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432020416)))]; + tensor block_12_ffn_rmsnorm = mul(x = block_12_ffn_rmsnorm_normalized, y = block_12_ffn_rmsnorm_y_0)[name = string("block_12_ffn_rmsnorm")]; + tensor block_12_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432022272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435290944))))[name = string("block_12_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_57 = constexpr_blockwise_shift_scale(data = block_12_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435446656))))[name = string("constexpr_blockwise_shift_scale_57")]; + tensor block_12_ffn_inproj_strides_0 = const()[name = string("block_12_ffn_inproj_strides_0"), val = tensor([1])]; + string block_12_ffn_inproj_pad_type_0 = const()[name = string("block_12_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_12_ffn_inproj_pad_0 = const()[name = string("block_12_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_12_ffn_inproj_dilations_0 = const()[name = string("block_12_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_12_ffn_inproj_groups_0 = const()[name = string("block_12_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_12_ffn_inproj = conv(dilations = block_12_ffn_inproj_dilations_0, groups = block_12_ffn_inproj_groups_0, pad = block_12_ffn_inproj_pad_0, pad_type = block_12_ffn_inproj_pad_type_0, strides = block_12_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_57, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_inproj")]; + tensor block_12_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435456448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438725120))))[name = string("block_12_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_58 = constexpr_blockwise_shift_scale(data = block_12_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438880832))))[name = string("constexpr_blockwise_shift_scale_58")]; + tensor block_12_ffn_g_strides_0 = const()[name = string("block_12_ffn_g_strides_0"), val = tensor([1])]; + string block_12_ffn_g_pad_type_0 = const()[name = string("block_12_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_12_ffn_g_pad_0 = const()[name = string("block_12_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_12_ffn_g_dilations_0 = const()[name = string("block_12_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_12_ffn_g_groups_0 = const()[name = string("block_12_ffn_g_groups_0"), val = int32(1)]; + tensor block_12_ffn_g = conv(dilations = block_12_ffn_g_dilations_0, groups = block_12_ffn_g_groups_0, pad = block_12_ffn_g_pad_0, pad_type = block_12_ffn_g_pad_type_0, strides = block_12_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_58, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_g")]; + tensor block_12_ffn_g_activation = silu(x = block_12_ffn_g)[name = string("block_12_ffn_g_activation")]; + tensor block_12_ffn_x_gated = mul(x = block_12_ffn_inproj, y = block_12_ffn_g_activation)[name = string("block_12_ffn_x_gated")]; + tensor block_12_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438890624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442159296))))[name = string("block_12_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_59 = constexpr_blockwise_shift_scale(data = block_12_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442188032))))[name = string("constexpr_blockwise_shift_scale_59")]; + tensor block_12_ffn_outproj_strides_0 = const()[name = string("block_12_ffn_outproj_strides_0"), val = tensor([1])]; + string block_12_ffn_outproj_pad_type_0 = const()[name = string("block_12_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_12_ffn_outproj_pad_0 = const()[name = string("block_12_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_12_ffn_outproj_dilations_0 = const()[name = string("block_12_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_12_ffn_outproj_groups_0 = const()[name = string("block_12_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_12_ffn_outproj = conv(dilations = block_12_ffn_outproj_dilations_0, groups = block_12_ffn_outproj_groups_0, pad = block_12_ffn_outproj_pad_0, pad_type = block_12_ffn_outproj_pad_type_0, strides = block_12_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_59, x = block_12_ffn_x_gated)[name = string("block_12_ffn_outproj")]; + tensor block_12_residual_2 = add(x = block_12_ffn_outproj, y = block_12_residual_1)[name = string("block_12_residual_2")]; + tensor block_13_attention_rmsnorm_abs = abs(x = block_12_residual_2)[name = string("block_13_attention_rmsnorm_abs")]; + tensor block_13_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_13_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_13_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_13_attention_rmsnorm_maxval = reduce_max(axes = block_13_attention_rmsnorm_maxval_axes_0, keep_dims = block_13_attention_rmsnorm_maxval_keep_dims_0, x = block_13_attention_rmsnorm_abs)[name = string("block_13_attention_rmsnorm_maxval")]; + fp16 block_13_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_13_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_13_attention_rmsnorm_maxval_clipped = clip(alpha = block_13_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_13_attention_rmsnorm_maxval_clipped_beta_0, x = block_13_attention_rmsnorm_maxval)[name = string("block_13_attention_rmsnorm_maxval_clipped")]; + tensor block_13_attention_rmsnorm_scaled = real_div(x = block_12_residual_2, y = block_13_attention_rmsnorm_maxval_clipped)[name = string("block_13_attention_rmsnorm_scaled")]; + tensor block_13_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_13_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_13_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_13_attention_rmsnorm_squared_sum_keep_dims_0, x = block_13_attention_rmsnorm_scaled)[name = string("block_13_attention_rmsnorm_squared_sum")]; + fp16 block_13_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_13_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_13_attention_rmsnorm_rsqrt_epsilon_0, x = block_13_attention_rmsnorm_squared_sum)[name = string("block_13_attention_rmsnorm_rsqrt")]; + fp16 block_13_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_13_attention_rmsnorm_dim_scaled = mul(x = block_13_attention_rmsnorm_scaled, y = block_13_attention_rmsnorm_dim_scaled_y_0)[name = string("block_13_attention_rmsnorm_dim_scaled")]; + tensor block_13_attention_rmsnorm_normalized = mul(x = block_13_attention_rmsnorm_dim_scaled, y = block_13_attention_rmsnorm_rsqrt)[name = string("block_13_attention_rmsnorm_normalized")]; + tensor block_13_attention_rmsnorm_y_0 = const()[name = string("block_13_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442189888)))]; + tensor block_13_attention_rmsnorm = mul(x = block_13_attention_rmsnorm_normalized, y = block_13_attention_rmsnorm_y_0)[name = string("block_13_attention_rmsnorm")]; + tensor attention_13_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442191744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442965952))))[name = string("attention_13_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_60 = constexpr_blockwise_shift_scale(data = attention_13_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443002880))))[name = string("constexpr_blockwise_shift_scale_60")]; + tensor attention_13_qkvproj_bias_0 = const()[name = string("attention_13_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443005248)))]; + tensor attention_13_qkvproj_strides_0 = const()[name = string("attention_13_qkvproj_strides_0"), val = tensor([1])]; + string attention_13_qkvproj_pad_type_0 = const()[name = string("attention_13_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_13_qkvproj_pad_0 = const()[name = string("attention_13_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_13_qkvproj_dilations_0 = const()[name = string("attention_13_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_13_qkvproj_groups_0 = const()[name = string("attention_13_qkvproj_groups_0"), val = int32(1)]; + tensor attention_13_qkvproj = conv(bias = attention_13_qkvproj_bias_0, dilations = attention_13_qkvproj_dilations_0, groups = attention_13_qkvproj_groups_0, pad = attention_13_qkvproj_pad_0, pad_type = attention_13_qkvproj_pad_type_0, strides = attention_13_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_60, x = block_13_attention_rmsnorm)[name = string("attention_13_qkvproj")]; + tensor attention_13_head_reshape_shape_0 = const()[name = string("attention_13_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_13_head_reshape = reshape(shape = attention_13_head_reshape_shape_0, x = attention_13_qkvproj)[name = string("attention_13_head_reshape")]; + tensor attention_13_head_transpose_perm_0 = const()[name = string("attention_13_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_13_split_qkv_heads_axis_0 = const()[name = string("attention_13_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_13_split_qkv_heads_split_sizes_0 = const()[name = string("attention_13_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_13_head_transpose = transpose(perm = attention_13_head_transpose_perm_0, x = attention_13_head_reshape)[name = string("transpose_22")]; + tensor attention_13_split_qkv_heads_0, tensor attention_13_split_qkv_heads_1, tensor attention_13_split_qkv_heads_2 = split(axis = attention_13_split_qkv_heads_axis_0, split_sizes = attention_13_split_qkv_heads_split_sizes_0, x = attention_13_head_transpose)[name = string("attention_13_split_qkv_heads")]; + tensor attention_13_q_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_13_q_rope_lhs_mult")]; + int32 attention_13_q_rotate_half_split_num_splits_0 = const()[name = string("attention_13_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_13_q_rotate_half_split_axis_0 = const()[name = string("attention_13_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_13_q_rotate_half_split_0, tensor attention_13_q_rotate_half_split_1 = split(axis = attention_13_q_rotate_half_split_axis_0, num_splits = attention_13_q_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_0)[name = string("attention_13_q_rotate_half_split")]; + fp16 attention_13_q_rotate_half_neg_y_0 = const()[name = string("attention_13_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_13_q_rotate_half_neg = mul(x = attention_13_q_rotate_half_split_1, y = attention_13_q_rotate_half_neg_y_0)[name = string("attention_13_q_rotate_half_neg")]; + int32 attention_13_q_rotate_half_concat_axis_0 = const()[name = string("attention_13_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_13_q_rotate_half_concat_interleave_0 = const()[name = string("attention_13_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_13_q_rotate_half_concat = concat(axis = attention_13_q_rotate_half_concat_axis_0, interleave = attention_13_q_rotate_half_concat_interleave_0, values = (attention_13_q_rotate_half_neg, attention_13_q_rotate_half_split_0))[name = string("attention_13_q_rotate_half_concat")]; + tensor attention_13_q_rope_rhs_mult = mul(x = attention_13_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_q_rope_rhs_mult")]; + tensor attention_13_q_rope = add(x = attention_13_q_rope_lhs_mult, y = attention_13_q_rope_rhs_mult)[name = string("attention_13_q_rope")]; + tensor attention_13_k_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_13_k_rope_lhs_mult")]; + int32 attention_13_k_rotate_half_split_num_splits_0 = const()[name = string("attention_13_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_13_k_rotate_half_split_axis_0 = const()[name = string("attention_13_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_13_k_rotate_half_split_0, tensor attention_13_k_rotate_half_split_1 = split(axis = attention_13_k_rotate_half_split_axis_0, num_splits = attention_13_k_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_1)[name = string("attention_13_k_rotate_half_split")]; + fp16 attention_13_k_rotate_half_neg_y_0 = const()[name = string("attention_13_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_13_k_rotate_half_neg = mul(x = attention_13_k_rotate_half_split_1, y = attention_13_k_rotate_half_neg_y_0)[name = string("attention_13_k_rotate_half_neg")]; + int32 attention_13_k_rotate_half_concat_axis_0 = const()[name = string("attention_13_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_13_k_rotate_half_concat_interleave_0 = const()[name = string("attention_13_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_13_k_rotate_half_concat = concat(axis = attention_13_k_rotate_half_concat_axis_0, interleave = attention_13_k_rotate_half_concat_interleave_0, values = (attention_13_k_rotate_half_neg, attention_13_k_rotate_half_split_0))[name = string("attention_13_k_rotate_half_concat")]; + tensor attention_13_k_rope_rhs_mult = mul(x = attention_13_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_k_rope_rhs_mult")]; + tensor attention_13_k_rope = add(x = attention_13_k_rope_lhs_mult, y = attention_13_k_rope_rhs_mult)[name = string("attention_13_k_rope")]; + int32 attention_13_q_splits_axis_0 = const()[name = string("attention_13_q_splits_axis_0"), val = int32(1)]; + int32 attention_13_q_splits_num_splits_0 = const()[name = string("attention_13_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_13_q_splits_0, tensor attention_13_q_splits_1 = split(axis = attention_13_q_splits_axis_0, num_splits = attention_13_q_splits_num_splits_0, x = attention_13_q_rope)[name = string("attention_13_q_splits")]; + tensor attention_13_update_begin_0_values0_0 = const()[name = string("attention_13_update_begin_0_values0_0"), val = tensor([13])]; + tensor attention_13_update_begin_0_values1_0 = const()[name = string("attention_13_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_13_update_begin_0_values3_0 = const()[name = string("attention_13_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_13_update_begin_0_axis_0 = const()[name = string("attention_13_update_begin_0_axis_0"), val = int32(0)]; + bool attention_13_update_begin_0_interleave_0 = const()[name = string("attention_13_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_13_update_begin_0 = concat(axis = attention_13_update_begin_0_axis_0, interleave = attention_13_update_begin_0_interleave_0, values = (attention_13_update_begin_0_values0_0, attention_13_update_begin_0_values1_0, query_pos1, attention_13_update_begin_0_values3_0))[name = string("attention_13_update_begin_0")]; + tensor attention_13_update_end_0_values0_0 = const()[name = string("attention_13_update_end_0_values0_0"), val = tensor([14])]; + tensor attention_13_update_end_0_values1_0 = const()[name = string("attention_13_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_13_update_end_0_values3_0 = const()[name = string("attention_13_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_13_update_end_0_axis_0 = const()[name = string("attention_13_update_end_0_axis_0"), val = int32(0)]; + bool attention_13_update_end_0_interleave_0 = const()[name = string("attention_13_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_13_update_end_0 = concat(axis = attention_13_update_end_0_axis_0, interleave = attention_13_update_end_0_interleave_0, values = (attention_13_update_end_0_values0_0, attention_13_update_end_0_values1_0, end_pos_0, attention_13_update_end_0_values3_0))[name = string("attention_13_update_end_0")]; + tensor attention_13_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_updated_key_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_key_cache_0_squeeze_mask_0, update = attention_13_k_rope, x = coreml_update_state_24)[name = string("attention_13_updated_key_cache_0")]; + write_state(data = attention_13_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_26 = read_state(input = key_cache_state)[name = string("coreml_update_state_74")]; + tensor attention_13_key_cache_begin_0 = const()[name = string("attention_13_key_cache_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor attention_13_key_cache_end_0 = const()[name = string("attention_13_key_cache_end_0"), val = tensor([14, 2, 512, 64])]; + tensor attention_13_key_cache_squeeze_mask_0 = const()[name = string("attention_13_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_key_cache = slice_by_index(begin = attention_13_key_cache_begin_0, end = attention_13_key_cache_end_0, squeeze_mask = attention_13_key_cache_squeeze_mask_0, x = coreml_update_state_26)[name = string("attention_13_key_cache")]; + int32 attention_13_key_cache_head_axis_0 = const()[name = string("attention_13_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_13_key_cache_head_num_splits_0 = const()[name = string("attention_13_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_13_key_cache_head_0, tensor attention_13_key_cache_head_1 = split(axis = attention_13_key_cache_head_axis_0, num_splits = attention_13_key_cache_head_num_splits_0, x = attention_13_key_cache)[name = string("attention_13_key_cache_head")]; + tensor attention_13_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_updated_value_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_value_cache_0_squeeze_mask_0, update = attention_13_split_qkv_heads_2, x = coreml_update_state_25)[name = string("attention_13_updated_value_cache_0")]; + write_state(data = attention_13_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_27 = read_state(input = value_cache_state)[name = string("coreml_update_state_75")]; + tensor attention_13_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_13_slice_current_layer_value_cache_begin_0"), val = tensor([13, 0, 0, 0])]; + tensor attention_13_slice_current_layer_value_cache_end_0 = const()[name = string("attention_13_slice_current_layer_value_cache_end_0"), val = tensor([14, 2, 512, 64])]; + tensor attention_13_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_13_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_13_slice_current_layer_value_cache = slice_by_index(begin = attention_13_slice_current_layer_value_cache_begin_0, end = attention_13_slice_current_layer_value_cache_end_0, squeeze_mask = attention_13_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_27)[name = string("attention_13_slice_current_layer_value_cache")]; + int32 attention_13_slice_value_cache_heads_axis_0 = const()[name = string("attention_13_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_13_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_13_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_13_slice_value_cache_heads_0, tensor attention_13_slice_value_cache_heads_1 = split(axis = attention_13_slice_value_cache_heads_axis_0, num_splits = attention_13_slice_value_cache_heads_num_splits_0, x = attention_13_slice_current_layer_value_cache)[name = string("attention_13_slice_value_cache_heads")]; + bool attention_13_scores_0_transpose_y_0 = const()[name = string("attention_13_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_13_scores_0_transpose_x_0 = const()[name = string("attention_13_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_13_scores_0 = matmul(transpose_x = attention_13_scores_0_transpose_x_0, transpose_y = attention_13_scores_0_transpose_y_0, x = attention_13_key_cache_head_0, y = attention_13_q_splits_0)[name = string("attention_13_scores_0")]; + fp16 attention_13_scaled_scores_0_y_0 = const()[name = string("attention_13_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_13_scaled_scores_0 = mul(x = attention_13_scores_0, y = attention_13_scaled_scores_0_y_0)[name = string("attention_13_scaled_scores_0")]; + tensor attention_13_masked_scaled_scores_0 = add(x = attention_13_scaled_scores_0, y = transpose_0)[name = string("attention_13_masked_scaled_scores_0")]; + int32 softmax_26_axis_0 = const()[name = string("softmax_26_axis_0"), val = int32(-2)]; + tensor softmax_26 = softmax(axis = softmax_26_axis_0, x = attention_13_masked_scaled_scores_0)[name = string("softmax_26")]; + bool attention_13_attention_0_transpose_x_0 = const()[name = string("attention_13_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_13_attention_0_transpose_y_0 = const()[name = string("attention_13_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_13_attention_0 = matmul(transpose_x = attention_13_attention_0_transpose_x_0, transpose_y = attention_13_attention_0_transpose_y_0, x = softmax_26, y = attention_13_slice_value_cache_heads_0)[name = string("attention_13_attention_0")]; + bool attention_13_scores_1_transpose_y_0 = const()[name = string("attention_13_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_13_scores_1_transpose_x_0 = const()[name = string("attention_13_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_13_scores_1 = matmul(transpose_x = attention_13_scores_1_transpose_x_0, transpose_y = attention_13_scores_1_transpose_y_0, x = attention_13_key_cache_head_1, y = attention_13_q_splits_1)[name = string("attention_13_scores_1")]; + fp16 attention_13_scaled_scores_1_y_0 = const()[name = string("attention_13_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_13_scaled_scores_1 = mul(x = attention_13_scores_1, y = attention_13_scaled_scores_1_y_0)[name = string("attention_13_scaled_scores_1")]; + tensor attention_13_masked_scaled_scores_1 = add(x = attention_13_scaled_scores_1, y = transpose_0)[name = string("attention_13_masked_scaled_scores_1")]; + int32 softmax_27_axis_0 = const()[name = string("softmax_27_axis_0"), val = int32(-2)]; + tensor softmax_27 = softmax(axis = softmax_27_axis_0, x = attention_13_masked_scaled_scores_1)[name = string("softmax_27")]; + bool attention_13_attention_1_transpose_x_0 = const()[name = string("attention_13_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_13_attention_1_transpose_y_0 = const()[name = string("attention_13_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_13_attention_1 = matmul(transpose_x = attention_13_attention_1_transpose_x_0, transpose_y = attention_13_attention_1_transpose_y_0, x = softmax_27, y = attention_13_slice_value_cache_heads_1)[name = string("attention_13_attention_1")]; + int32 attention_13_concat_attention_all_heads_axis_0 = const()[name = string("attention_13_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_13_concat_attention_all_heads_interleave_0 = const()[name = string("attention_13_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_13_concat_attention_all_heads = concat(axis = attention_13_concat_attention_all_heads_axis_0, interleave = attention_13_concat_attention_all_heads_interleave_0, values = (attention_13_attention_0, attention_13_attention_1))[name = string("attention_13_concat_attention_all_heads")]; + tensor attention_13_channels_first_retransposed_perm_0 = const()[name = string("attention_13_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_13_reshaped_shape_0 = const()[name = string("attention_13_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_13_channels_first_retransposed = transpose(perm = attention_13_channels_first_retransposed_perm_0, x = attention_13_concat_attention_all_heads)[name = string("transpose_21")]; + tensor attention_13_reshaped = reshape(shape = attention_13_reshaped_shape_0, x = attention_13_channels_first_retransposed)[name = string("attention_13_reshaped")]; + tensor attention_13_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443007616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443609792))))[name = string("attention_13_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_61 = constexpr_blockwise_shift_scale(data = attention_13_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443638528))))[name = string("constexpr_blockwise_shift_scale_61")]; + tensor attention_13_outproj_strides_0 = const()[name = string("attention_13_outproj_strides_0"), val = tensor([1])]; + string attention_13_outproj_pad_type_0 = const()[name = string("attention_13_outproj_pad_type_0"), val = string("valid")]; + tensor attention_13_outproj_pad_0 = const()[name = string("attention_13_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_13_outproj_dilations_0 = const()[name = string("attention_13_outproj_dilations_0"), val = tensor([1])]; + int32 attention_13_outproj_groups_0 = const()[name = string("attention_13_outproj_groups_0"), val = int32(1)]; + tensor attention_13_outproj = conv(dilations = attention_13_outproj_dilations_0, groups = attention_13_outproj_groups_0, pad = attention_13_outproj_pad_0, pad_type = attention_13_outproj_pad_type_0, strides = attention_13_outproj_strides_0, weight = constexpr_blockwise_shift_scale_61, x = attention_13_reshaped)[name = string("attention_13_outproj")]; + tensor block_13_residual_1 = add(x = block_12_residual_2, y = attention_13_outproj)[name = string("block_13_residual_1")]; + tensor block_13_ffn_rmsnorm_abs = abs(x = block_13_residual_1)[name = string("block_13_ffn_rmsnorm_abs")]; + tensor block_13_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_13_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_13_ffn_rmsnorm_maxval = reduce_max(axes = block_13_ffn_rmsnorm_maxval_axes_0, keep_dims = block_13_ffn_rmsnorm_maxval_keep_dims_0, x = block_13_ffn_rmsnorm_abs)[name = string("block_13_ffn_rmsnorm_maxval")]; + fp16 block_13_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_13_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_13_ffn_rmsnorm_maxval_clipped = clip(alpha = block_13_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_13_ffn_rmsnorm_maxval_clipped_beta_0, x = block_13_ffn_rmsnorm_maxval)[name = string("block_13_ffn_rmsnorm_maxval_clipped")]; + tensor block_13_ffn_rmsnorm_scaled = real_div(x = block_13_residual_1, y = block_13_ffn_rmsnorm_maxval_clipped)[name = string("block_13_ffn_rmsnorm_scaled")]; + tensor block_13_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_13_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_13_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_13_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_13_ffn_rmsnorm_scaled)[name = string("block_13_ffn_rmsnorm_squared_sum")]; + fp16 block_13_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_13_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_13_ffn_rmsnorm_rsqrt_epsilon_0, x = block_13_ffn_rmsnorm_squared_sum)[name = string("block_13_ffn_rmsnorm_rsqrt")]; + fp16 block_13_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_13_ffn_rmsnorm_dim_scaled = mul(x = block_13_ffn_rmsnorm_scaled, y = block_13_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_13_ffn_rmsnorm_dim_scaled")]; + tensor block_13_ffn_rmsnorm_normalized = mul(x = block_13_ffn_rmsnorm_dim_scaled, y = block_13_ffn_rmsnorm_rsqrt)[name = string("block_13_ffn_rmsnorm_normalized")]; + tensor block_13_ffn_rmsnorm_y_0 = const()[name = string("block_13_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443640384)))]; + tensor block_13_ffn_rmsnorm = mul(x = block_13_ffn_rmsnorm_normalized, y = block_13_ffn_rmsnorm_y_0)[name = string("block_13_ffn_rmsnorm")]; + tensor block_13_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443642240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446910912))))[name = string("block_13_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_62 = constexpr_blockwise_shift_scale(data = block_13_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447066624))))[name = string("constexpr_blockwise_shift_scale_62")]; + tensor block_13_ffn_inproj_strides_0 = const()[name = string("block_13_ffn_inproj_strides_0"), val = tensor([1])]; + string block_13_ffn_inproj_pad_type_0 = const()[name = string("block_13_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_13_ffn_inproj_pad_0 = const()[name = string("block_13_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_13_ffn_inproj_dilations_0 = const()[name = string("block_13_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_13_ffn_inproj_groups_0 = const()[name = string("block_13_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_13_ffn_inproj = conv(dilations = block_13_ffn_inproj_dilations_0, groups = block_13_ffn_inproj_groups_0, pad = block_13_ffn_inproj_pad_0, pad_type = block_13_ffn_inproj_pad_type_0, strides = block_13_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_62, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_inproj")]; + tensor block_13_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447076416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450345088))))[name = string("block_13_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_63 = constexpr_blockwise_shift_scale(data = block_13_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450500800))))[name = string("constexpr_blockwise_shift_scale_63")]; + tensor block_13_ffn_g_strides_0 = const()[name = string("block_13_ffn_g_strides_0"), val = tensor([1])]; + string block_13_ffn_g_pad_type_0 = const()[name = string("block_13_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_13_ffn_g_pad_0 = const()[name = string("block_13_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_13_ffn_g_dilations_0 = const()[name = string("block_13_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_13_ffn_g_groups_0 = const()[name = string("block_13_ffn_g_groups_0"), val = int32(1)]; + tensor block_13_ffn_g = conv(dilations = block_13_ffn_g_dilations_0, groups = block_13_ffn_g_groups_0, pad = block_13_ffn_g_pad_0, pad_type = block_13_ffn_g_pad_type_0, strides = block_13_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_63, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_g")]; + tensor block_13_ffn_g_activation = silu(x = block_13_ffn_g)[name = string("block_13_ffn_g_activation")]; + tensor block_13_ffn_x_gated = mul(x = block_13_ffn_inproj, y = block_13_ffn_g_activation)[name = string("block_13_ffn_x_gated")]; + tensor block_13_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450510592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453779264))))[name = string("block_13_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_64 = constexpr_blockwise_shift_scale(data = block_13_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453808000))))[name = string("constexpr_blockwise_shift_scale_64")]; + tensor block_13_ffn_outproj_strides_0 = const()[name = string("block_13_ffn_outproj_strides_0"), val = tensor([1])]; + string block_13_ffn_outproj_pad_type_0 = const()[name = string("block_13_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_13_ffn_outproj_pad_0 = const()[name = string("block_13_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_13_ffn_outproj_dilations_0 = const()[name = string("block_13_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_13_ffn_outproj_groups_0 = const()[name = string("block_13_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_13_ffn_outproj = conv(dilations = block_13_ffn_outproj_dilations_0, groups = block_13_ffn_outproj_groups_0, pad = block_13_ffn_outproj_pad_0, pad_type = block_13_ffn_outproj_pad_type_0, strides = block_13_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_64, x = block_13_ffn_x_gated)[name = string("block_13_ffn_outproj")]; + tensor block_13_residual_2 = add(x = block_13_ffn_outproj, y = block_13_residual_1)[name = string("block_13_residual_2")]; + tensor block_14_attention_rmsnorm_abs = abs(x = block_13_residual_2)[name = string("block_14_attention_rmsnorm_abs")]; + tensor block_14_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_14_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_14_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_14_attention_rmsnorm_maxval = reduce_max(axes = block_14_attention_rmsnorm_maxval_axes_0, keep_dims = block_14_attention_rmsnorm_maxval_keep_dims_0, x = block_14_attention_rmsnorm_abs)[name = string("block_14_attention_rmsnorm_maxval")]; + fp16 block_14_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_14_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_14_attention_rmsnorm_maxval_clipped = clip(alpha = block_14_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_14_attention_rmsnorm_maxval_clipped_beta_0, x = block_14_attention_rmsnorm_maxval)[name = string("block_14_attention_rmsnorm_maxval_clipped")]; + tensor block_14_attention_rmsnorm_scaled = real_div(x = block_13_residual_2, y = block_14_attention_rmsnorm_maxval_clipped)[name = string("block_14_attention_rmsnorm_scaled")]; + tensor block_14_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_14_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_14_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_14_attention_rmsnorm_squared_sum_keep_dims_0, x = block_14_attention_rmsnorm_scaled)[name = string("block_14_attention_rmsnorm_squared_sum")]; + fp16 block_14_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_14_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_14_attention_rmsnorm_rsqrt_epsilon_0, x = block_14_attention_rmsnorm_squared_sum)[name = string("block_14_attention_rmsnorm_rsqrt")]; + fp16 block_14_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_14_attention_rmsnorm_dim_scaled = mul(x = block_14_attention_rmsnorm_scaled, y = block_14_attention_rmsnorm_dim_scaled_y_0)[name = string("block_14_attention_rmsnorm_dim_scaled")]; + tensor block_14_attention_rmsnorm_normalized = mul(x = block_14_attention_rmsnorm_dim_scaled, y = block_14_attention_rmsnorm_rsqrt)[name = string("block_14_attention_rmsnorm_normalized")]; + tensor block_14_attention_rmsnorm_y_0 = const()[name = string("block_14_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453809856)))]; + tensor block_14_attention_rmsnorm = mul(x = block_14_attention_rmsnorm_normalized, y = block_14_attention_rmsnorm_y_0)[name = string("block_14_attention_rmsnorm")]; + tensor attention_14_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454585920))))[name = string("attention_14_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_65 = constexpr_blockwise_shift_scale(data = attention_14_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454622848))))[name = string("constexpr_blockwise_shift_scale_65")]; + tensor attention_14_qkvproj_bias_0 = const()[name = string("attention_14_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454625216)))]; + tensor attention_14_qkvproj_strides_0 = const()[name = string("attention_14_qkvproj_strides_0"), val = tensor([1])]; + string attention_14_qkvproj_pad_type_0 = const()[name = string("attention_14_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_14_qkvproj_pad_0 = const()[name = string("attention_14_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_14_qkvproj_dilations_0 = const()[name = string("attention_14_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_14_qkvproj_groups_0 = const()[name = string("attention_14_qkvproj_groups_0"), val = int32(1)]; + tensor attention_14_qkvproj = conv(bias = attention_14_qkvproj_bias_0, dilations = attention_14_qkvproj_dilations_0, groups = attention_14_qkvproj_groups_0, pad = attention_14_qkvproj_pad_0, pad_type = attention_14_qkvproj_pad_type_0, strides = attention_14_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_65, x = block_14_attention_rmsnorm)[name = string("attention_14_qkvproj")]; + tensor attention_14_head_reshape_shape_0 = const()[name = string("attention_14_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_14_head_reshape = reshape(shape = attention_14_head_reshape_shape_0, x = attention_14_qkvproj)[name = string("attention_14_head_reshape")]; + tensor attention_14_head_transpose_perm_0 = const()[name = string("attention_14_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_14_split_qkv_heads_axis_0 = const()[name = string("attention_14_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_14_split_qkv_heads_split_sizes_0 = const()[name = string("attention_14_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_14_head_transpose = transpose(perm = attention_14_head_transpose_perm_0, x = attention_14_head_reshape)[name = string("transpose_20")]; + tensor attention_14_split_qkv_heads_0, tensor attention_14_split_qkv_heads_1, tensor attention_14_split_qkv_heads_2 = split(axis = attention_14_split_qkv_heads_axis_0, split_sizes = attention_14_split_qkv_heads_split_sizes_0, x = attention_14_head_transpose)[name = string("attention_14_split_qkv_heads")]; + tensor attention_14_q_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_14_q_rope_lhs_mult")]; + int32 attention_14_q_rotate_half_split_num_splits_0 = const()[name = string("attention_14_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_14_q_rotate_half_split_axis_0 = const()[name = string("attention_14_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_14_q_rotate_half_split_0, tensor attention_14_q_rotate_half_split_1 = split(axis = attention_14_q_rotate_half_split_axis_0, num_splits = attention_14_q_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_0)[name = string("attention_14_q_rotate_half_split")]; + fp16 attention_14_q_rotate_half_neg_y_0 = const()[name = string("attention_14_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_14_q_rotate_half_neg = mul(x = attention_14_q_rotate_half_split_1, y = attention_14_q_rotate_half_neg_y_0)[name = string("attention_14_q_rotate_half_neg")]; + int32 attention_14_q_rotate_half_concat_axis_0 = const()[name = string("attention_14_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_14_q_rotate_half_concat_interleave_0 = const()[name = string("attention_14_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_14_q_rotate_half_concat = concat(axis = attention_14_q_rotate_half_concat_axis_0, interleave = attention_14_q_rotate_half_concat_interleave_0, values = (attention_14_q_rotate_half_neg, attention_14_q_rotate_half_split_0))[name = string("attention_14_q_rotate_half_concat")]; + tensor attention_14_q_rope_rhs_mult = mul(x = attention_14_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_q_rope_rhs_mult")]; + tensor attention_14_q_rope = add(x = attention_14_q_rope_lhs_mult, y = attention_14_q_rope_rhs_mult)[name = string("attention_14_q_rope")]; + tensor attention_14_k_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_14_k_rope_lhs_mult")]; + int32 attention_14_k_rotate_half_split_num_splits_0 = const()[name = string("attention_14_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_14_k_rotate_half_split_axis_0 = const()[name = string("attention_14_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_14_k_rotate_half_split_0, tensor attention_14_k_rotate_half_split_1 = split(axis = attention_14_k_rotate_half_split_axis_0, num_splits = attention_14_k_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_1)[name = string("attention_14_k_rotate_half_split")]; + fp16 attention_14_k_rotate_half_neg_y_0 = const()[name = string("attention_14_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_14_k_rotate_half_neg = mul(x = attention_14_k_rotate_half_split_1, y = attention_14_k_rotate_half_neg_y_0)[name = string("attention_14_k_rotate_half_neg")]; + int32 attention_14_k_rotate_half_concat_axis_0 = const()[name = string("attention_14_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_14_k_rotate_half_concat_interleave_0 = const()[name = string("attention_14_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_14_k_rotate_half_concat = concat(axis = attention_14_k_rotate_half_concat_axis_0, interleave = attention_14_k_rotate_half_concat_interleave_0, values = (attention_14_k_rotate_half_neg, attention_14_k_rotate_half_split_0))[name = string("attention_14_k_rotate_half_concat")]; + tensor attention_14_k_rope_rhs_mult = mul(x = attention_14_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_k_rope_rhs_mult")]; + tensor attention_14_k_rope = add(x = attention_14_k_rope_lhs_mult, y = attention_14_k_rope_rhs_mult)[name = string("attention_14_k_rope")]; + int32 attention_14_q_splits_axis_0 = const()[name = string("attention_14_q_splits_axis_0"), val = int32(1)]; + int32 attention_14_q_splits_num_splits_0 = const()[name = string("attention_14_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_14_q_splits_0, tensor attention_14_q_splits_1 = split(axis = attention_14_q_splits_axis_0, num_splits = attention_14_q_splits_num_splits_0, x = attention_14_q_rope)[name = string("attention_14_q_splits")]; + tensor attention_14_update_begin_0_values0_0 = const()[name = string("attention_14_update_begin_0_values0_0"), val = tensor([14])]; + tensor attention_14_update_begin_0_values1_0 = const()[name = string("attention_14_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_14_update_begin_0_values3_0 = const()[name = string("attention_14_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_14_update_begin_0_axis_0 = const()[name = string("attention_14_update_begin_0_axis_0"), val = int32(0)]; + bool attention_14_update_begin_0_interleave_0 = const()[name = string("attention_14_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_14_update_begin_0 = concat(axis = attention_14_update_begin_0_axis_0, interleave = attention_14_update_begin_0_interleave_0, values = (attention_14_update_begin_0_values0_0, attention_14_update_begin_0_values1_0, query_pos1, attention_14_update_begin_0_values3_0))[name = string("attention_14_update_begin_0")]; + tensor attention_14_update_end_0_values0_0 = const()[name = string("attention_14_update_end_0_values0_0"), val = tensor([15])]; + tensor attention_14_update_end_0_values1_0 = const()[name = string("attention_14_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_14_update_end_0_values3_0 = const()[name = string("attention_14_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_14_update_end_0_axis_0 = const()[name = string("attention_14_update_end_0_axis_0"), val = int32(0)]; + bool attention_14_update_end_0_interleave_0 = const()[name = string("attention_14_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_14_update_end_0 = concat(axis = attention_14_update_end_0_axis_0, interleave = attention_14_update_end_0_interleave_0, values = (attention_14_update_end_0_values0_0, attention_14_update_end_0_values1_0, end_pos_0, attention_14_update_end_0_values3_0))[name = string("attention_14_update_end_0")]; + tensor attention_14_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_updated_key_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_key_cache_0_squeeze_mask_0, update = attention_14_k_rope, x = coreml_update_state_26)[name = string("attention_14_updated_key_cache_0")]; + write_state(data = attention_14_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_28 = read_state(input = key_cache_state)[name = string("coreml_update_state_76")]; + tensor attention_14_key_cache_begin_0 = const()[name = string("attention_14_key_cache_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor attention_14_key_cache_end_0 = const()[name = string("attention_14_key_cache_end_0"), val = tensor([15, 2, 512, 64])]; + tensor attention_14_key_cache_squeeze_mask_0 = const()[name = string("attention_14_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_key_cache = slice_by_index(begin = attention_14_key_cache_begin_0, end = attention_14_key_cache_end_0, squeeze_mask = attention_14_key_cache_squeeze_mask_0, x = coreml_update_state_28)[name = string("attention_14_key_cache")]; + int32 attention_14_key_cache_head_axis_0 = const()[name = string("attention_14_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_14_key_cache_head_num_splits_0 = const()[name = string("attention_14_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_14_key_cache_head_0, tensor attention_14_key_cache_head_1 = split(axis = attention_14_key_cache_head_axis_0, num_splits = attention_14_key_cache_head_num_splits_0, x = attention_14_key_cache)[name = string("attention_14_key_cache_head")]; + tensor attention_14_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_updated_value_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_value_cache_0_squeeze_mask_0, update = attention_14_split_qkv_heads_2, x = coreml_update_state_27)[name = string("attention_14_updated_value_cache_0")]; + write_state(data = attention_14_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_29 = read_state(input = value_cache_state)[name = string("coreml_update_state_77")]; + tensor attention_14_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_14_slice_current_layer_value_cache_begin_0"), val = tensor([14, 0, 0, 0])]; + tensor attention_14_slice_current_layer_value_cache_end_0 = const()[name = string("attention_14_slice_current_layer_value_cache_end_0"), val = tensor([15, 2, 512, 64])]; + tensor attention_14_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_14_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_14_slice_current_layer_value_cache = slice_by_index(begin = attention_14_slice_current_layer_value_cache_begin_0, end = attention_14_slice_current_layer_value_cache_end_0, squeeze_mask = attention_14_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_29)[name = string("attention_14_slice_current_layer_value_cache")]; + int32 attention_14_slice_value_cache_heads_axis_0 = const()[name = string("attention_14_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_14_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_14_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_14_slice_value_cache_heads_0, tensor attention_14_slice_value_cache_heads_1 = split(axis = attention_14_slice_value_cache_heads_axis_0, num_splits = attention_14_slice_value_cache_heads_num_splits_0, x = attention_14_slice_current_layer_value_cache)[name = string("attention_14_slice_value_cache_heads")]; + bool attention_14_scores_0_transpose_y_0 = const()[name = string("attention_14_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_14_scores_0_transpose_x_0 = const()[name = string("attention_14_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_14_scores_0 = matmul(transpose_x = attention_14_scores_0_transpose_x_0, transpose_y = attention_14_scores_0_transpose_y_0, x = attention_14_key_cache_head_0, y = attention_14_q_splits_0)[name = string("attention_14_scores_0")]; + fp16 attention_14_scaled_scores_0_y_0 = const()[name = string("attention_14_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_14_scaled_scores_0 = mul(x = attention_14_scores_0, y = attention_14_scaled_scores_0_y_0)[name = string("attention_14_scaled_scores_0")]; + tensor attention_14_masked_scaled_scores_0 = add(x = attention_14_scaled_scores_0, y = transpose_0)[name = string("attention_14_masked_scaled_scores_0")]; + int32 softmax_28_axis_0 = const()[name = string("softmax_28_axis_0"), val = int32(-2)]; + tensor softmax_28 = softmax(axis = softmax_28_axis_0, x = attention_14_masked_scaled_scores_0)[name = string("softmax_28")]; + bool attention_14_attention_0_transpose_x_0 = const()[name = string("attention_14_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_14_attention_0_transpose_y_0 = const()[name = string("attention_14_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_14_attention_0 = matmul(transpose_x = attention_14_attention_0_transpose_x_0, transpose_y = attention_14_attention_0_transpose_y_0, x = softmax_28, y = attention_14_slice_value_cache_heads_0)[name = string("attention_14_attention_0")]; + bool attention_14_scores_1_transpose_y_0 = const()[name = string("attention_14_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_14_scores_1_transpose_x_0 = const()[name = string("attention_14_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_14_scores_1 = matmul(transpose_x = attention_14_scores_1_transpose_x_0, transpose_y = attention_14_scores_1_transpose_y_0, x = attention_14_key_cache_head_1, y = attention_14_q_splits_1)[name = string("attention_14_scores_1")]; + fp16 attention_14_scaled_scores_1_y_0 = const()[name = string("attention_14_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_14_scaled_scores_1 = mul(x = attention_14_scores_1, y = attention_14_scaled_scores_1_y_0)[name = string("attention_14_scaled_scores_1")]; + tensor attention_14_masked_scaled_scores_1 = add(x = attention_14_scaled_scores_1, y = transpose_0)[name = string("attention_14_masked_scaled_scores_1")]; + int32 softmax_29_axis_0 = const()[name = string("softmax_29_axis_0"), val = int32(-2)]; + tensor softmax_29 = softmax(axis = softmax_29_axis_0, x = attention_14_masked_scaled_scores_1)[name = string("softmax_29")]; + bool attention_14_attention_1_transpose_x_0 = const()[name = string("attention_14_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_14_attention_1_transpose_y_0 = const()[name = string("attention_14_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_14_attention_1 = matmul(transpose_x = attention_14_attention_1_transpose_x_0, transpose_y = attention_14_attention_1_transpose_y_0, x = softmax_29, y = attention_14_slice_value_cache_heads_1)[name = string("attention_14_attention_1")]; + int32 attention_14_concat_attention_all_heads_axis_0 = const()[name = string("attention_14_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_14_concat_attention_all_heads_interleave_0 = const()[name = string("attention_14_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_14_concat_attention_all_heads = concat(axis = attention_14_concat_attention_all_heads_axis_0, interleave = attention_14_concat_attention_all_heads_interleave_0, values = (attention_14_attention_0, attention_14_attention_1))[name = string("attention_14_concat_attention_all_heads")]; + tensor attention_14_channels_first_retransposed_perm_0 = const()[name = string("attention_14_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_14_reshaped_shape_0 = const()[name = string("attention_14_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_14_channels_first_retransposed = transpose(perm = attention_14_channels_first_retransposed_perm_0, x = attention_14_concat_attention_all_heads)[name = string("transpose_19")]; + tensor attention_14_reshaped = reshape(shape = attention_14_reshaped_shape_0, x = attention_14_channels_first_retransposed)[name = string("attention_14_reshaped")]; + tensor attention_14_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454627584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455229760))))[name = string("attention_14_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_66 = constexpr_blockwise_shift_scale(data = attention_14_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455258496))))[name = string("constexpr_blockwise_shift_scale_66")]; + tensor attention_14_outproj_strides_0 = const()[name = string("attention_14_outproj_strides_0"), val = tensor([1])]; + string attention_14_outproj_pad_type_0 = const()[name = string("attention_14_outproj_pad_type_0"), val = string("valid")]; + tensor attention_14_outproj_pad_0 = const()[name = string("attention_14_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_14_outproj_dilations_0 = const()[name = string("attention_14_outproj_dilations_0"), val = tensor([1])]; + int32 attention_14_outproj_groups_0 = const()[name = string("attention_14_outproj_groups_0"), val = int32(1)]; + tensor attention_14_outproj = conv(dilations = attention_14_outproj_dilations_0, groups = attention_14_outproj_groups_0, pad = attention_14_outproj_pad_0, pad_type = attention_14_outproj_pad_type_0, strides = attention_14_outproj_strides_0, weight = constexpr_blockwise_shift_scale_66, x = attention_14_reshaped)[name = string("attention_14_outproj")]; + tensor block_14_residual_1 = add(x = block_13_residual_2, y = attention_14_outproj)[name = string("block_14_residual_1")]; + tensor block_14_ffn_rmsnorm_abs = abs(x = block_14_residual_1)[name = string("block_14_ffn_rmsnorm_abs")]; + tensor block_14_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_14_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_14_ffn_rmsnorm_maxval = reduce_max(axes = block_14_ffn_rmsnorm_maxval_axes_0, keep_dims = block_14_ffn_rmsnorm_maxval_keep_dims_0, x = block_14_ffn_rmsnorm_abs)[name = string("block_14_ffn_rmsnorm_maxval")]; + fp16 block_14_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_14_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_14_ffn_rmsnorm_maxval_clipped = clip(alpha = block_14_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_14_ffn_rmsnorm_maxval_clipped_beta_0, x = block_14_ffn_rmsnorm_maxval)[name = string("block_14_ffn_rmsnorm_maxval_clipped")]; + tensor block_14_ffn_rmsnorm_scaled = real_div(x = block_14_residual_1, y = block_14_ffn_rmsnorm_maxval_clipped)[name = string("block_14_ffn_rmsnorm_scaled")]; + tensor block_14_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_14_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_14_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_14_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_14_ffn_rmsnorm_scaled)[name = string("block_14_ffn_rmsnorm_squared_sum")]; + fp16 block_14_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_14_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_14_ffn_rmsnorm_rsqrt_epsilon_0, x = block_14_ffn_rmsnorm_squared_sum)[name = string("block_14_ffn_rmsnorm_rsqrt")]; + fp16 block_14_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_14_ffn_rmsnorm_dim_scaled = mul(x = block_14_ffn_rmsnorm_scaled, y = block_14_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_14_ffn_rmsnorm_dim_scaled")]; + tensor block_14_ffn_rmsnorm_normalized = mul(x = block_14_ffn_rmsnorm_dim_scaled, y = block_14_ffn_rmsnorm_rsqrt)[name = string("block_14_ffn_rmsnorm_normalized")]; + tensor block_14_ffn_rmsnorm_y_0 = const()[name = string("block_14_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455260352)))]; + tensor block_14_ffn_rmsnorm = mul(x = block_14_ffn_rmsnorm_normalized, y = block_14_ffn_rmsnorm_y_0)[name = string("block_14_ffn_rmsnorm")]; + tensor block_14_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455262208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458530880))))[name = string("block_14_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_67 = constexpr_blockwise_shift_scale(data = block_14_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458686592))))[name = string("constexpr_blockwise_shift_scale_67")]; + tensor block_14_ffn_inproj_strides_0 = const()[name = string("block_14_ffn_inproj_strides_0"), val = tensor([1])]; + string block_14_ffn_inproj_pad_type_0 = const()[name = string("block_14_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_14_ffn_inproj_pad_0 = const()[name = string("block_14_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_14_ffn_inproj_dilations_0 = const()[name = string("block_14_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_14_ffn_inproj_groups_0 = const()[name = string("block_14_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_14_ffn_inproj = conv(dilations = block_14_ffn_inproj_dilations_0, groups = block_14_ffn_inproj_groups_0, pad = block_14_ffn_inproj_pad_0, pad_type = block_14_ffn_inproj_pad_type_0, strides = block_14_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_67, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_inproj")]; + tensor block_14_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458696384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461965056))))[name = string("block_14_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_68 = constexpr_blockwise_shift_scale(data = block_14_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462120768))))[name = string("constexpr_blockwise_shift_scale_68")]; + tensor block_14_ffn_g_strides_0 = const()[name = string("block_14_ffn_g_strides_0"), val = tensor([1])]; + string block_14_ffn_g_pad_type_0 = const()[name = string("block_14_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_14_ffn_g_pad_0 = const()[name = string("block_14_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_14_ffn_g_dilations_0 = const()[name = string("block_14_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_14_ffn_g_groups_0 = const()[name = string("block_14_ffn_g_groups_0"), val = int32(1)]; + tensor block_14_ffn_g = conv(dilations = block_14_ffn_g_dilations_0, groups = block_14_ffn_g_groups_0, pad = block_14_ffn_g_pad_0, pad_type = block_14_ffn_g_pad_type_0, strides = block_14_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_68, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_g")]; + tensor block_14_ffn_g_activation = silu(x = block_14_ffn_g)[name = string("block_14_ffn_g_activation")]; + tensor block_14_ffn_x_gated = mul(x = block_14_ffn_inproj, y = block_14_ffn_g_activation)[name = string("block_14_ffn_x_gated")]; + tensor block_14_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462130560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465399232))))[name = string("block_14_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_69 = constexpr_blockwise_shift_scale(data = block_14_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465427968))))[name = string("constexpr_blockwise_shift_scale_69")]; + tensor block_14_ffn_outproj_strides_0 = const()[name = string("block_14_ffn_outproj_strides_0"), val = tensor([1])]; + string block_14_ffn_outproj_pad_type_0 = const()[name = string("block_14_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_14_ffn_outproj_pad_0 = const()[name = string("block_14_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_14_ffn_outproj_dilations_0 = const()[name = string("block_14_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_14_ffn_outproj_groups_0 = const()[name = string("block_14_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_14_ffn_outproj = conv(dilations = block_14_ffn_outproj_dilations_0, groups = block_14_ffn_outproj_groups_0, pad = block_14_ffn_outproj_pad_0, pad_type = block_14_ffn_outproj_pad_type_0, strides = block_14_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_69, x = block_14_ffn_x_gated)[name = string("block_14_ffn_outproj")]; + tensor block_14_residual_2 = add(x = block_14_ffn_outproj, y = block_14_residual_1)[name = string("block_14_residual_2")]; + tensor block_15_attention_rmsnorm_abs = abs(x = block_14_residual_2)[name = string("block_15_attention_rmsnorm_abs")]; + tensor block_15_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_15_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_15_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_15_attention_rmsnorm_maxval = reduce_max(axes = block_15_attention_rmsnorm_maxval_axes_0, keep_dims = block_15_attention_rmsnorm_maxval_keep_dims_0, x = block_15_attention_rmsnorm_abs)[name = string("block_15_attention_rmsnorm_maxval")]; + fp16 block_15_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_15_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_15_attention_rmsnorm_maxval_clipped = clip(alpha = block_15_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_15_attention_rmsnorm_maxval_clipped_beta_0, x = block_15_attention_rmsnorm_maxval)[name = string("block_15_attention_rmsnorm_maxval_clipped")]; + tensor block_15_attention_rmsnorm_scaled = real_div(x = block_14_residual_2, y = block_15_attention_rmsnorm_maxval_clipped)[name = string("block_15_attention_rmsnorm_scaled")]; + tensor block_15_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_15_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_15_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_15_attention_rmsnorm_squared_sum_keep_dims_0, x = block_15_attention_rmsnorm_scaled)[name = string("block_15_attention_rmsnorm_squared_sum")]; + fp16 block_15_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_15_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_15_attention_rmsnorm_rsqrt_epsilon_0, x = block_15_attention_rmsnorm_squared_sum)[name = string("block_15_attention_rmsnorm_rsqrt")]; + fp16 block_15_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_15_attention_rmsnorm_dim_scaled = mul(x = block_15_attention_rmsnorm_scaled, y = block_15_attention_rmsnorm_dim_scaled_y_0)[name = string("block_15_attention_rmsnorm_dim_scaled")]; + tensor block_15_attention_rmsnorm_normalized = mul(x = block_15_attention_rmsnorm_dim_scaled, y = block_15_attention_rmsnorm_rsqrt)[name = string("block_15_attention_rmsnorm_normalized")]; + tensor block_15_attention_rmsnorm_y_0 = const()[name = string("block_15_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465429824)))]; + tensor block_15_attention_rmsnorm = mul(x = block_15_attention_rmsnorm_normalized, y = block_15_attention_rmsnorm_y_0)[name = string("block_15_attention_rmsnorm")]; + tensor attention_15_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465431680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466205888))))[name = string("attention_15_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_70 = constexpr_blockwise_shift_scale(data = attention_15_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466242816))))[name = string("constexpr_blockwise_shift_scale_70")]; + tensor attention_15_qkvproj_bias_0 = const()[name = string("attention_15_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466245184)))]; + tensor attention_15_qkvproj_strides_0 = const()[name = string("attention_15_qkvproj_strides_0"), val = tensor([1])]; + string attention_15_qkvproj_pad_type_0 = const()[name = string("attention_15_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_15_qkvproj_pad_0 = const()[name = string("attention_15_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_15_qkvproj_dilations_0 = const()[name = string("attention_15_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_15_qkvproj_groups_0 = const()[name = string("attention_15_qkvproj_groups_0"), val = int32(1)]; + tensor attention_15_qkvproj = conv(bias = attention_15_qkvproj_bias_0, dilations = attention_15_qkvproj_dilations_0, groups = attention_15_qkvproj_groups_0, pad = attention_15_qkvproj_pad_0, pad_type = attention_15_qkvproj_pad_type_0, strides = attention_15_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_70, x = block_15_attention_rmsnorm)[name = string("attention_15_qkvproj")]; + tensor attention_15_head_reshape_shape_0 = const()[name = string("attention_15_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_15_head_reshape = reshape(shape = attention_15_head_reshape_shape_0, x = attention_15_qkvproj)[name = string("attention_15_head_reshape")]; + tensor attention_15_head_transpose_perm_0 = const()[name = string("attention_15_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_15_split_qkv_heads_axis_0 = const()[name = string("attention_15_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_15_split_qkv_heads_split_sizes_0 = const()[name = string("attention_15_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_15_head_transpose = transpose(perm = attention_15_head_transpose_perm_0, x = attention_15_head_reshape)[name = string("transpose_18")]; + tensor attention_15_split_qkv_heads_0, tensor attention_15_split_qkv_heads_1, tensor attention_15_split_qkv_heads_2 = split(axis = attention_15_split_qkv_heads_axis_0, split_sizes = attention_15_split_qkv_heads_split_sizes_0, x = attention_15_head_transpose)[name = string("attention_15_split_qkv_heads")]; + tensor attention_15_q_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_15_q_rope_lhs_mult")]; + int32 attention_15_q_rotate_half_split_num_splits_0 = const()[name = string("attention_15_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_15_q_rotate_half_split_axis_0 = const()[name = string("attention_15_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_15_q_rotate_half_split_0, tensor attention_15_q_rotate_half_split_1 = split(axis = attention_15_q_rotate_half_split_axis_0, num_splits = attention_15_q_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_0)[name = string("attention_15_q_rotate_half_split")]; + fp16 attention_15_q_rotate_half_neg_y_0 = const()[name = string("attention_15_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_15_q_rotate_half_neg = mul(x = attention_15_q_rotate_half_split_1, y = attention_15_q_rotate_half_neg_y_0)[name = string("attention_15_q_rotate_half_neg")]; + int32 attention_15_q_rotate_half_concat_axis_0 = const()[name = string("attention_15_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_15_q_rotate_half_concat_interleave_0 = const()[name = string("attention_15_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_15_q_rotate_half_concat = concat(axis = attention_15_q_rotate_half_concat_axis_0, interleave = attention_15_q_rotate_half_concat_interleave_0, values = (attention_15_q_rotate_half_neg, attention_15_q_rotate_half_split_0))[name = string("attention_15_q_rotate_half_concat")]; + tensor attention_15_q_rope_rhs_mult = mul(x = attention_15_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_q_rope_rhs_mult")]; + tensor attention_15_q_rope = add(x = attention_15_q_rope_lhs_mult, y = attention_15_q_rope_rhs_mult)[name = string("attention_15_q_rope")]; + tensor attention_15_k_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_15_k_rope_lhs_mult")]; + int32 attention_15_k_rotate_half_split_num_splits_0 = const()[name = string("attention_15_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_15_k_rotate_half_split_axis_0 = const()[name = string("attention_15_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_15_k_rotate_half_split_0, tensor attention_15_k_rotate_half_split_1 = split(axis = attention_15_k_rotate_half_split_axis_0, num_splits = attention_15_k_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_1)[name = string("attention_15_k_rotate_half_split")]; + fp16 attention_15_k_rotate_half_neg_y_0 = const()[name = string("attention_15_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_15_k_rotate_half_neg = mul(x = attention_15_k_rotate_half_split_1, y = attention_15_k_rotate_half_neg_y_0)[name = string("attention_15_k_rotate_half_neg")]; + int32 attention_15_k_rotate_half_concat_axis_0 = const()[name = string("attention_15_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_15_k_rotate_half_concat_interleave_0 = const()[name = string("attention_15_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_15_k_rotate_half_concat = concat(axis = attention_15_k_rotate_half_concat_axis_0, interleave = attention_15_k_rotate_half_concat_interleave_0, values = (attention_15_k_rotate_half_neg, attention_15_k_rotate_half_split_0))[name = string("attention_15_k_rotate_half_concat")]; + tensor attention_15_k_rope_rhs_mult = mul(x = attention_15_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_k_rope_rhs_mult")]; + tensor attention_15_k_rope = add(x = attention_15_k_rope_lhs_mult, y = attention_15_k_rope_rhs_mult)[name = string("attention_15_k_rope")]; + int32 attention_15_q_splits_axis_0 = const()[name = string("attention_15_q_splits_axis_0"), val = int32(1)]; + int32 attention_15_q_splits_num_splits_0 = const()[name = string("attention_15_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_15_q_splits_0, tensor attention_15_q_splits_1 = split(axis = attention_15_q_splits_axis_0, num_splits = attention_15_q_splits_num_splits_0, x = attention_15_q_rope)[name = string("attention_15_q_splits")]; + tensor attention_15_update_begin_0_values0_0 = const()[name = string("attention_15_update_begin_0_values0_0"), val = tensor([15])]; + tensor attention_15_update_begin_0_values1_0 = const()[name = string("attention_15_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_15_update_begin_0_values3_0 = const()[name = string("attention_15_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_15_update_begin_0_axis_0 = const()[name = string("attention_15_update_begin_0_axis_0"), val = int32(0)]; + bool attention_15_update_begin_0_interleave_0 = const()[name = string("attention_15_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_15_update_begin_0 = concat(axis = attention_15_update_begin_0_axis_0, interleave = attention_15_update_begin_0_interleave_0, values = (attention_15_update_begin_0_values0_0, attention_15_update_begin_0_values1_0, query_pos1, attention_15_update_begin_0_values3_0))[name = string("attention_15_update_begin_0")]; + tensor attention_15_update_end_0_values0_0 = const()[name = string("attention_15_update_end_0_values0_0"), val = tensor([16])]; + tensor attention_15_update_end_0_values1_0 = const()[name = string("attention_15_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_15_update_end_0_values3_0 = const()[name = string("attention_15_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_15_update_end_0_axis_0 = const()[name = string("attention_15_update_end_0_axis_0"), val = int32(0)]; + bool attention_15_update_end_0_interleave_0 = const()[name = string("attention_15_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_15_update_end_0 = concat(axis = attention_15_update_end_0_axis_0, interleave = attention_15_update_end_0_interleave_0, values = (attention_15_update_end_0_values0_0, attention_15_update_end_0_values1_0, end_pos_0, attention_15_update_end_0_values3_0))[name = string("attention_15_update_end_0")]; + tensor attention_15_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_updated_key_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_key_cache_0_squeeze_mask_0, update = attention_15_k_rope, x = coreml_update_state_28)[name = string("attention_15_updated_key_cache_0")]; + write_state(data = attention_15_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_30 = read_state(input = key_cache_state)[name = string("coreml_update_state_78")]; + tensor attention_15_key_cache_begin_0 = const()[name = string("attention_15_key_cache_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor attention_15_key_cache_end_0 = const()[name = string("attention_15_key_cache_end_0"), val = tensor([16, 2, 512, 64])]; + tensor attention_15_key_cache_squeeze_mask_0 = const()[name = string("attention_15_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_key_cache = slice_by_index(begin = attention_15_key_cache_begin_0, end = attention_15_key_cache_end_0, squeeze_mask = attention_15_key_cache_squeeze_mask_0, x = coreml_update_state_30)[name = string("attention_15_key_cache")]; + int32 attention_15_key_cache_head_axis_0 = const()[name = string("attention_15_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_15_key_cache_head_num_splits_0 = const()[name = string("attention_15_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_15_key_cache_head_0, tensor attention_15_key_cache_head_1 = split(axis = attention_15_key_cache_head_axis_0, num_splits = attention_15_key_cache_head_num_splits_0, x = attention_15_key_cache)[name = string("attention_15_key_cache_head")]; + tensor attention_15_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_updated_value_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_value_cache_0_squeeze_mask_0, update = attention_15_split_qkv_heads_2, x = coreml_update_state_29)[name = string("attention_15_updated_value_cache_0")]; + write_state(data = attention_15_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_31 = read_state(input = value_cache_state)[name = string("coreml_update_state_79")]; + tensor attention_15_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_15_slice_current_layer_value_cache_begin_0"), val = tensor([15, 0, 0, 0])]; + tensor attention_15_slice_current_layer_value_cache_end_0 = const()[name = string("attention_15_slice_current_layer_value_cache_end_0"), val = tensor([16, 2, 512, 64])]; + tensor attention_15_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_15_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_15_slice_current_layer_value_cache = slice_by_index(begin = attention_15_slice_current_layer_value_cache_begin_0, end = attention_15_slice_current_layer_value_cache_end_0, squeeze_mask = attention_15_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_31)[name = string("attention_15_slice_current_layer_value_cache")]; + int32 attention_15_slice_value_cache_heads_axis_0 = const()[name = string("attention_15_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_15_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_15_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_15_slice_value_cache_heads_0, tensor attention_15_slice_value_cache_heads_1 = split(axis = attention_15_slice_value_cache_heads_axis_0, num_splits = attention_15_slice_value_cache_heads_num_splits_0, x = attention_15_slice_current_layer_value_cache)[name = string("attention_15_slice_value_cache_heads")]; + bool attention_15_scores_0_transpose_y_0 = const()[name = string("attention_15_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_15_scores_0_transpose_x_0 = const()[name = string("attention_15_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_15_scores_0 = matmul(transpose_x = attention_15_scores_0_transpose_x_0, transpose_y = attention_15_scores_0_transpose_y_0, x = attention_15_key_cache_head_0, y = attention_15_q_splits_0)[name = string("attention_15_scores_0")]; + fp16 attention_15_scaled_scores_0_y_0 = const()[name = string("attention_15_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_15_scaled_scores_0 = mul(x = attention_15_scores_0, y = attention_15_scaled_scores_0_y_0)[name = string("attention_15_scaled_scores_0")]; + tensor attention_15_masked_scaled_scores_0 = add(x = attention_15_scaled_scores_0, y = transpose_0)[name = string("attention_15_masked_scaled_scores_0")]; + int32 softmax_30_axis_0 = const()[name = string("softmax_30_axis_0"), val = int32(-2)]; + tensor softmax_30 = softmax(axis = softmax_30_axis_0, x = attention_15_masked_scaled_scores_0)[name = string("softmax_30")]; + bool attention_15_attention_0_transpose_x_0 = const()[name = string("attention_15_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_15_attention_0_transpose_y_0 = const()[name = string("attention_15_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_15_attention_0 = matmul(transpose_x = attention_15_attention_0_transpose_x_0, transpose_y = attention_15_attention_0_transpose_y_0, x = softmax_30, y = attention_15_slice_value_cache_heads_0)[name = string("attention_15_attention_0")]; + bool attention_15_scores_1_transpose_y_0 = const()[name = string("attention_15_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_15_scores_1_transpose_x_0 = const()[name = string("attention_15_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_15_scores_1 = matmul(transpose_x = attention_15_scores_1_transpose_x_0, transpose_y = attention_15_scores_1_transpose_y_0, x = attention_15_key_cache_head_1, y = attention_15_q_splits_1)[name = string("attention_15_scores_1")]; + fp16 attention_15_scaled_scores_1_y_0 = const()[name = string("attention_15_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_15_scaled_scores_1 = mul(x = attention_15_scores_1, y = attention_15_scaled_scores_1_y_0)[name = string("attention_15_scaled_scores_1")]; + tensor attention_15_masked_scaled_scores_1 = add(x = attention_15_scaled_scores_1, y = transpose_0)[name = string("attention_15_masked_scaled_scores_1")]; + int32 softmax_31_axis_0 = const()[name = string("softmax_31_axis_0"), val = int32(-2)]; + tensor softmax_31 = softmax(axis = softmax_31_axis_0, x = attention_15_masked_scaled_scores_1)[name = string("softmax_31")]; + bool attention_15_attention_1_transpose_x_0 = const()[name = string("attention_15_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_15_attention_1_transpose_y_0 = const()[name = string("attention_15_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_15_attention_1 = matmul(transpose_x = attention_15_attention_1_transpose_x_0, transpose_y = attention_15_attention_1_transpose_y_0, x = softmax_31, y = attention_15_slice_value_cache_heads_1)[name = string("attention_15_attention_1")]; + int32 attention_15_concat_attention_all_heads_axis_0 = const()[name = string("attention_15_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_15_concat_attention_all_heads_interleave_0 = const()[name = string("attention_15_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_15_concat_attention_all_heads = concat(axis = attention_15_concat_attention_all_heads_axis_0, interleave = attention_15_concat_attention_all_heads_interleave_0, values = (attention_15_attention_0, attention_15_attention_1))[name = string("attention_15_concat_attention_all_heads")]; + tensor attention_15_channels_first_retransposed_perm_0 = const()[name = string("attention_15_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_15_reshaped_shape_0 = const()[name = string("attention_15_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_15_channels_first_retransposed = transpose(perm = attention_15_channels_first_retransposed_perm_0, x = attention_15_concat_attention_all_heads)[name = string("transpose_17")]; + tensor attention_15_reshaped = reshape(shape = attention_15_reshaped_shape_0, x = attention_15_channels_first_retransposed)[name = string("attention_15_reshaped")]; + tensor attention_15_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466247552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466849728))))[name = string("attention_15_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_71 = constexpr_blockwise_shift_scale(data = attention_15_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466878464))))[name = string("constexpr_blockwise_shift_scale_71")]; + tensor attention_15_outproj_strides_0 = const()[name = string("attention_15_outproj_strides_0"), val = tensor([1])]; + string attention_15_outproj_pad_type_0 = const()[name = string("attention_15_outproj_pad_type_0"), val = string("valid")]; + tensor attention_15_outproj_pad_0 = const()[name = string("attention_15_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_15_outproj_dilations_0 = const()[name = string("attention_15_outproj_dilations_0"), val = tensor([1])]; + int32 attention_15_outproj_groups_0 = const()[name = string("attention_15_outproj_groups_0"), val = int32(1)]; + tensor attention_15_outproj = conv(dilations = attention_15_outproj_dilations_0, groups = attention_15_outproj_groups_0, pad = attention_15_outproj_pad_0, pad_type = attention_15_outproj_pad_type_0, strides = attention_15_outproj_strides_0, weight = constexpr_blockwise_shift_scale_71, x = attention_15_reshaped)[name = string("attention_15_outproj")]; + tensor block_15_residual_1 = add(x = block_14_residual_2, y = attention_15_outproj)[name = string("block_15_residual_1")]; + tensor block_15_ffn_rmsnorm_abs = abs(x = block_15_residual_1)[name = string("block_15_ffn_rmsnorm_abs")]; + tensor block_15_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_15_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_15_ffn_rmsnorm_maxval = reduce_max(axes = block_15_ffn_rmsnorm_maxval_axes_0, keep_dims = block_15_ffn_rmsnorm_maxval_keep_dims_0, x = block_15_ffn_rmsnorm_abs)[name = string("block_15_ffn_rmsnorm_maxval")]; + fp16 block_15_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_15_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_15_ffn_rmsnorm_maxval_clipped = clip(alpha = block_15_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_15_ffn_rmsnorm_maxval_clipped_beta_0, x = block_15_ffn_rmsnorm_maxval)[name = string("block_15_ffn_rmsnorm_maxval_clipped")]; + tensor block_15_ffn_rmsnorm_scaled = real_div(x = block_15_residual_1, y = block_15_ffn_rmsnorm_maxval_clipped)[name = string("block_15_ffn_rmsnorm_scaled")]; + tensor block_15_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_15_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_15_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_15_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_15_ffn_rmsnorm_scaled)[name = string("block_15_ffn_rmsnorm_squared_sum")]; + fp16 block_15_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_15_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_15_ffn_rmsnorm_rsqrt_epsilon_0, x = block_15_ffn_rmsnorm_squared_sum)[name = string("block_15_ffn_rmsnorm_rsqrt")]; + fp16 block_15_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_15_ffn_rmsnorm_dim_scaled = mul(x = block_15_ffn_rmsnorm_scaled, y = block_15_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_15_ffn_rmsnorm_dim_scaled")]; + tensor block_15_ffn_rmsnorm_normalized = mul(x = block_15_ffn_rmsnorm_dim_scaled, y = block_15_ffn_rmsnorm_rsqrt)[name = string("block_15_ffn_rmsnorm_normalized")]; + tensor block_15_ffn_rmsnorm_y_0 = const()[name = string("block_15_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466880320)))]; + tensor block_15_ffn_rmsnorm = mul(x = block_15_ffn_rmsnorm_normalized, y = block_15_ffn_rmsnorm_y_0)[name = string("block_15_ffn_rmsnorm")]; + tensor block_15_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466882176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470150848))))[name = string("block_15_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_72 = constexpr_blockwise_shift_scale(data = block_15_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470306560))))[name = string("constexpr_blockwise_shift_scale_72")]; + tensor block_15_ffn_inproj_strides_0 = const()[name = string("block_15_ffn_inproj_strides_0"), val = tensor([1])]; + string block_15_ffn_inproj_pad_type_0 = const()[name = string("block_15_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_15_ffn_inproj_pad_0 = const()[name = string("block_15_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_15_ffn_inproj_dilations_0 = const()[name = string("block_15_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_15_ffn_inproj_groups_0 = const()[name = string("block_15_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_15_ffn_inproj = conv(dilations = block_15_ffn_inproj_dilations_0, groups = block_15_ffn_inproj_groups_0, pad = block_15_ffn_inproj_pad_0, pad_type = block_15_ffn_inproj_pad_type_0, strides = block_15_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_72, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_inproj")]; + tensor block_15_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470316352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473585024))))[name = string("block_15_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_73 = constexpr_blockwise_shift_scale(data = block_15_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473740736))))[name = string("constexpr_blockwise_shift_scale_73")]; + tensor block_15_ffn_g_strides_0 = const()[name = string("block_15_ffn_g_strides_0"), val = tensor([1])]; + string block_15_ffn_g_pad_type_0 = const()[name = string("block_15_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_15_ffn_g_pad_0 = const()[name = string("block_15_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_15_ffn_g_dilations_0 = const()[name = string("block_15_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_15_ffn_g_groups_0 = const()[name = string("block_15_ffn_g_groups_0"), val = int32(1)]; + tensor block_15_ffn_g = conv(dilations = block_15_ffn_g_dilations_0, groups = block_15_ffn_g_groups_0, pad = block_15_ffn_g_pad_0, pad_type = block_15_ffn_g_pad_type_0, strides = block_15_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_73, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_g")]; + tensor block_15_ffn_g_activation = silu(x = block_15_ffn_g)[name = string("block_15_ffn_g_activation")]; + tensor block_15_ffn_x_gated = mul(x = block_15_ffn_inproj, y = block_15_ffn_g_activation)[name = string("block_15_ffn_x_gated")]; + tensor block_15_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473750528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477019200))))[name = string("block_15_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_74 = constexpr_blockwise_shift_scale(data = block_15_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477047936))))[name = string("constexpr_blockwise_shift_scale_74")]; + tensor block_15_ffn_outproj_strides_0 = const()[name = string("block_15_ffn_outproj_strides_0"), val = tensor([1])]; + string block_15_ffn_outproj_pad_type_0 = const()[name = string("block_15_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_15_ffn_outproj_pad_0 = const()[name = string("block_15_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_15_ffn_outproj_dilations_0 = const()[name = string("block_15_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_15_ffn_outproj_groups_0 = const()[name = string("block_15_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_15_ffn_outproj = conv(dilations = block_15_ffn_outproj_dilations_0, groups = block_15_ffn_outproj_groups_0, pad = block_15_ffn_outproj_pad_0, pad_type = block_15_ffn_outproj_pad_type_0, strides = block_15_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_74, x = block_15_ffn_x_gated)[name = string("block_15_ffn_outproj")]; + tensor block_15_residual_2 = add(x = block_15_ffn_outproj, y = block_15_residual_1)[name = string("block_15_residual_2")]; + tensor block_16_attention_rmsnorm_abs = abs(x = block_15_residual_2)[name = string("block_16_attention_rmsnorm_abs")]; + tensor block_16_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_16_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_16_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_16_attention_rmsnorm_maxval = reduce_max(axes = block_16_attention_rmsnorm_maxval_axes_0, keep_dims = block_16_attention_rmsnorm_maxval_keep_dims_0, x = block_16_attention_rmsnorm_abs)[name = string("block_16_attention_rmsnorm_maxval")]; + fp16 block_16_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_16_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_16_attention_rmsnorm_maxval_clipped = clip(alpha = block_16_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_16_attention_rmsnorm_maxval_clipped_beta_0, x = block_16_attention_rmsnorm_maxval)[name = string("block_16_attention_rmsnorm_maxval_clipped")]; + tensor block_16_attention_rmsnorm_scaled = real_div(x = block_15_residual_2, y = block_16_attention_rmsnorm_maxval_clipped)[name = string("block_16_attention_rmsnorm_scaled")]; + tensor block_16_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_16_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_16_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_16_attention_rmsnorm_squared_sum_keep_dims_0, x = block_16_attention_rmsnorm_scaled)[name = string("block_16_attention_rmsnorm_squared_sum")]; + fp16 block_16_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_16_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_16_attention_rmsnorm_rsqrt_epsilon_0, x = block_16_attention_rmsnorm_squared_sum)[name = string("block_16_attention_rmsnorm_rsqrt")]; + fp16 block_16_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_16_attention_rmsnorm_dim_scaled = mul(x = block_16_attention_rmsnorm_scaled, y = block_16_attention_rmsnorm_dim_scaled_y_0)[name = string("block_16_attention_rmsnorm_dim_scaled")]; + tensor block_16_attention_rmsnorm_normalized = mul(x = block_16_attention_rmsnorm_dim_scaled, y = block_16_attention_rmsnorm_rsqrt)[name = string("block_16_attention_rmsnorm_normalized")]; + tensor block_16_attention_rmsnorm_y_0 = const()[name = string("block_16_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477049792)))]; + tensor block_16_attention_rmsnorm = mul(x = block_16_attention_rmsnorm_normalized, y = block_16_attention_rmsnorm_y_0)[name = string("block_16_attention_rmsnorm")]; + tensor attention_16_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477051648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477825856))))[name = string("attention_16_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_75 = constexpr_blockwise_shift_scale(data = attention_16_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477862784))))[name = string("constexpr_blockwise_shift_scale_75")]; + tensor attention_16_qkvproj_bias_0 = const()[name = string("attention_16_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477865152)))]; + tensor attention_16_qkvproj_strides_0 = const()[name = string("attention_16_qkvproj_strides_0"), val = tensor([1])]; + string attention_16_qkvproj_pad_type_0 = const()[name = string("attention_16_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_16_qkvproj_pad_0 = const()[name = string("attention_16_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_16_qkvproj_dilations_0 = const()[name = string("attention_16_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_16_qkvproj_groups_0 = const()[name = string("attention_16_qkvproj_groups_0"), val = int32(1)]; + tensor attention_16_qkvproj = conv(bias = attention_16_qkvproj_bias_0, dilations = attention_16_qkvproj_dilations_0, groups = attention_16_qkvproj_groups_0, pad = attention_16_qkvproj_pad_0, pad_type = attention_16_qkvproj_pad_type_0, strides = attention_16_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_75, x = block_16_attention_rmsnorm)[name = string("attention_16_qkvproj")]; + tensor attention_16_head_reshape_shape_0 = const()[name = string("attention_16_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_16_head_reshape = reshape(shape = attention_16_head_reshape_shape_0, x = attention_16_qkvproj)[name = string("attention_16_head_reshape")]; + tensor attention_16_head_transpose_perm_0 = const()[name = string("attention_16_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_16_split_qkv_heads_axis_0 = const()[name = string("attention_16_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_16_split_qkv_heads_split_sizes_0 = const()[name = string("attention_16_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_16_head_transpose = transpose(perm = attention_16_head_transpose_perm_0, x = attention_16_head_reshape)[name = string("transpose_16")]; + tensor attention_16_split_qkv_heads_0, tensor attention_16_split_qkv_heads_1, tensor attention_16_split_qkv_heads_2 = split(axis = attention_16_split_qkv_heads_axis_0, split_sizes = attention_16_split_qkv_heads_split_sizes_0, x = attention_16_head_transpose)[name = string("attention_16_split_qkv_heads")]; + tensor attention_16_q_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_16_q_rope_lhs_mult")]; + int32 attention_16_q_rotate_half_split_num_splits_0 = const()[name = string("attention_16_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_16_q_rotate_half_split_axis_0 = const()[name = string("attention_16_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_16_q_rotate_half_split_0, tensor attention_16_q_rotate_half_split_1 = split(axis = attention_16_q_rotate_half_split_axis_0, num_splits = attention_16_q_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_0)[name = string("attention_16_q_rotate_half_split")]; + fp16 attention_16_q_rotate_half_neg_y_0 = const()[name = string("attention_16_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_16_q_rotate_half_neg = mul(x = attention_16_q_rotate_half_split_1, y = attention_16_q_rotate_half_neg_y_0)[name = string("attention_16_q_rotate_half_neg")]; + int32 attention_16_q_rotate_half_concat_axis_0 = const()[name = string("attention_16_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_16_q_rotate_half_concat_interleave_0 = const()[name = string("attention_16_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_16_q_rotate_half_concat = concat(axis = attention_16_q_rotate_half_concat_axis_0, interleave = attention_16_q_rotate_half_concat_interleave_0, values = (attention_16_q_rotate_half_neg, attention_16_q_rotate_half_split_0))[name = string("attention_16_q_rotate_half_concat")]; + tensor attention_16_q_rope_rhs_mult = mul(x = attention_16_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_q_rope_rhs_mult")]; + tensor attention_16_q_rope = add(x = attention_16_q_rope_lhs_mult, y = attention_16_q_rope_rhs_mult)[name = string("attention_16_q_rope")]; + tensor attention_16_k_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_16_k_rope_lhs_mult")]; + int32 attention_16_k_rotate_half_split_num_splits_0 = const()[name = string("attention_16_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_16_k_rotate_half_split_axis_0 = const()[name = string("attention_16_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_16_k_rotate_half_split_0, tensor attention_16_k_rotate_half_split_1 = split(axis = attention_16_k_rotate_half_split_axis_0, num_splits = attention_16_k_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_1)[name = string("attention_16_k_rotate_half_split")]; + fp16 attention_16_k_rotate_half_neg_y_0 = const()[name = string("attention_16_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_16_k_rotate_half_neg = mul(x = attention_16_k_rotate_half_split_1, y = attention_16_k_rotate_half_neg_y_0)[name = string("attention_16_k_rotate_half_neg")]; + int32 attention_16_k_rotate_half_concat_axis_0 = const()[name = string("attention_16_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_16_k_rotate_half_concat_interleave_0 = const()[name = string("attention_16_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_16_k_rotate_half_concat = concat(axis = attention_16_k_rotate_half_concat_axis_0, interleave = attention_16_k_rotate_half_concat_interleave_0, values = (attention_16_k_rotate_half_neg, attention_16_k_rotate_half_split_0))[name = string("attention_16_k_rotate_half_concat")]; + tensor attention_16_k_rope_rhs_mult = mul(x = attention_16_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_k_rope_rhs_mult")]; + tensor attention_16_k_rope = add(x = attention_16_k_rope_lhs_mult, y = attention_16_k_rope_rhs_mult)[name = string("attention_16_k_rope")]; + int32 attention_16_q_splits_axis_0 = const()[name = string("attention_16_q_splits_axis_0"), val = int32(1)]; + int32 attention_16_q_splits_num_splits_0 = const()[name = string("attention_16_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_16_q_splits_0, tensor attention_16_q_splits_1 = split(axis = attention_16_q_splits_axis_0, num_splits = attention_16_q_splits_num_splits_0, x = attention_16_q_rope)[name = string("attention_16_q_splits")]; + tensor attention_16_update_begin_0_values0_0 = const()[name = string("attention_16_update_begin_0_values0_0"), val = tensor([16])]; + tensor attention_16_update_begin_0_values1_0 = const()[name = string("attention_16_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_16_update_begin_0_values3_0 = const()[name = string("attention_16_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_16_update_begin_0_axis_0 = const()[name = string("attention_16_update_begin_0_axis_0"), val = int32(0)]; + bool attention_16_update_begin_0_interleave_0 = const()[name = string("attention_16_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_16_update_begin_0 = concat(axis = attention_16_update_begin_0_axis_0, interleave = attention_16_update_begin_0_interleave_0, values = (attention_16_update_begin_0_values0_0, attention_16_update_begin_0_values1_0, query_pos1, attention_16_update_begin_0_values3_0))[name = string("attention_16_update_begin_0")]; + tensor attention_16_update_end_0_values0_0 = const()[name = string("attention_16_update_end_0_values0_0"), val = tensor([17])]; + tensor attention_16_update_end_0_values1_0 = const()[name = string("attention_16_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_16_update_end_0_values3_0 = const()[name = string("attention_16_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_16_update_end_0_axis_0 = const()[name = string("attention_16_update_end_0_axis_0"), val = int32(0)]; + bool attention_16_update_end_0_interleave_0 = const()[name = string("attention_16_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_16_update_end_0 = concat(axis = attention_16_update_end_0_axis_0, interleave = attention_16_update_end_0_interleave_0, values = (attention_16_update_end_0_values0_0, attention_16_update_end_0_values1_0, end_pos_0, attention_16_update_end_0_values3_0))[name = string("attention_16_update_end_0")]; + tensor attention_16_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_updated_key_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_key_cache_0_squeeze_mask_0, update = attention_16_k_rope, x = coreml_update_state_30)[name = string("attention_16_updated_key_cache_0")]; + write_state(data = attention_16_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_32 = read_state(input = key_cache_state)[name = string("coreml_update_state_80")]; + tensor attention_16_key_cache_begin_0 = const()[name = string("attention_16_key_cache_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor attention_16_key_cache_end_0 = const()[name = string("attention_16_key_cache_end_0"), val = tensor([17, 2, 512, 64])]; + tensor attention_16_key_cache_squeeze_mask_0 = const()[name = string("attention_16_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_key_cache = slice_by_index(begin = attention_16_key_cache_begin_0, end = attention_16_key_cache_end_0, squeeze_mask = attention_16_key_cache_squeeze_mask_0, x = coreml_update_state_32)[name = string("attention_16_key_cache")]; + int32 attention_16_key_cache_head_axis_0 = const()[name = string("attention_16_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_16_key_cache_head_num_splits_0 = const()[name = string("attention_16_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_16_key_cache_head_0, tensor attention_16_key_cache_head_1 = split(axis = attention_16_key_cache_head_axis_0, num_splits = attention_16_key_cache_head_num_splits_0, x = attention_16_key_cache)[name = string("attention_16_key_cache_head")]; + tensor attention_16_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_updated_value_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_value_cache_0_squeeze_mask_0, update = attention_16_split_qkv_heads_2, x = coreml_update_state_31)[name = string("attention_16_updated_value_cache_0")]; + write_state(data = attention_16_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_33 = read_state(input = value_cache_state)[name = string("coreml_update_state_81")]; + tensor attention_16_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_16_slice_current_layer_value_cache_begin_0"), val = tensor([16, 0, 0, 0])]; + tensor attention_16_slice_current_layer_value_cache_end_0 = const()[name = string("attention_16_slice_current_layer_value_cache_end_0"), val = tensor([17, 2, 512, 64])]; + tensor attention_16_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_16_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_16_slice_current_layer_value_cache = slice_by_index(begin = attention_16_slice_current_layer_value_cache_begin_0, end = attention_16_slice_current_layer_value_cache_end_0, squeeze_mask = attention_16_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_33)[name = string("attention_16_slice_current_layer_value_cache")]; + int32 attention_16_slice_value_cache_heads_axis_0 = const()[name = string("attention_16_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_16_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_16_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_16_slice_value_cache_heads_0, tensor attention_16_slice_value_cache_heads_1 = split(axis = attention_16_slice_value_cache_heads_axis_0, num_splits = attention_16_slice_value_cache_heads_num_splits_0, x = attention_16_slice_current_layer_value_cache)[name = string("attention_16_slice_value_cache_heads")]; + bool attention_16_scores_0_transpose_y_0 = const()[name = string("attention_16_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_16_scores_0_transpose_x_0 = const()[name = string("attention_16_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_16_scores_0 = matmul(transpose_x = attention_16_scores_0_transpose_x_0, transpose_y = attention_16_scores_0_transpose_y_0, x = attention_16_key_cache_head_0, y = attention_16_q_splits_0)[name = string("attention_16_scores_0")]; + fp16 attention_16_scaled_scores_0_y_0 = const()[name = string("attention_16_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_16_scaled_scores_0 = mul(x = attention_16_scores_0, y = attention_16_scaled_scores_0_y_0)[name = string("attention_16_scaled_scores_0")]; + tensor attention_16_masked_scaled_scores_0 = add(x = attention_16_scaled_scores_0, y = transpose_0)[name = string("attention_16_masked_scaled_scores_0")]; + int32 softmax_32_axis_0 = const()[name = string("softmax_32_axis_0"), val = int32(-2)]; + tensor softmax_32 = softmax(axis = softmax_32_axis_0, x = attention_16_masked_scaled_scores_0)[name = string("softmax_32")]; + bool attention_16_attention_0_transpose_x_0 = const()[name = string("attention_16_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_16_attention_0_transpose_y_0 = const()[name = string("attention_16_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_16_attention_0 = matmul(transpose_x = attention_16_attention_0_transpose_x_0, transpose_y = attention_16_attention_0_transpose_y_0, x = softmax_32, y = attention_16_slice_value_cache_heads_0)[name = string("attention_16_attention_0")]; + bool attention_16_scores_1_transpose_y_0 = const()[name = string("attention_16_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_16_scores_1_transpose_x_0 = const()[name = string("attention_16_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_16_scores_1 = matmul(transpose_x = attention_16_scores_1_transpose_x_0, transpose_y = attention_16_scores_1_transpose_y_0, x = attention_16_key_cache_head_1, y = attention_16_q_splits_1)[name = string("attention_16_scores_1")]; + fp16 attention_16_scaled_scores_1_y_0 = const()[name = string("attention_16_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_16_scaled_scores_1 = mul(x = attention_16_scores_1, y = attention_16_scaled_scores_1_y_0)[name = string("attention_16_scaled_scores_1")]; + tensor attention_16_masked_scaled_scores_1 = add(x = attention_16_scaled_scores_1, y = transpose_0)[name = string("attention_16_masked_scaled_scores_1")]; + int32 softmax_33_axis_0 = const()[name = string("softmax_33_axis_0"), val = int32(-2)]; + tensor softmax_33 = softmax(axis = softmax_33_axis_0, x = attention_16_masked_scaled_scores_1)[name = string("softmax_33")]; + bool attention_16_attention_1_transpose_x_0 = const()[name = string("attention_16_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_16_attention_1_transpose_y_0 = const()[name = string("attention_16_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_16_attention_1 = matmul(transpose_x = attention_16_attention_1_transpose_x_0, transpose_y = attention_16_attention_1_transpose_y_0, x = softmax_33, y = attention_16_slice_value_cache_heads_1)[name = string("attention_16_attention_1")]; + int32 attention_16_concat_attention_all_heads_axis_0 = const()[name = string("attention_16_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_16_concat_attention_all_heads_interleave_0 = const()[name = string("attention_16_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_16_concat_attention_all_heads = concat(axis = attention_16_concat_attention_all_heads_axis_0, interleave = attention_16_concat_attention_all_heads_interleave_0, values = (attention_16_attention_0, attention_16_attention_1))[name = string("attention_16_concat_attention_all_heads")]; + tensor attention_16_channels_first_retransposed_perm_0 = const()[name = string("attention_16_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_16_reshaped_shape_0 = const()[name = string("attention_16_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_16_channels_first_retransposed = transpose(perm = attention_16_channels_first_retransposed_perm_0, x = attention_16_concat_attention_all_heads)[name = string("transpose_15")]; + tensor attention_16_reshaped = reshape(shape = attention_16_reshaped_shape_0, x = attention_16_channels_first_retransposed)[name = string("attention_16_reshaped")]; + tensor attention_16_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477867520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478469696))))[name = string("attention_16_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_76 = constexpr_blockwise_shift_scale(data = attention_16_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478498432))))[name = string("constexpr_blockwise_shift_scale_76")]; + tensor attention_16_outproj_strides_0 = const()[name = string("attention_16_outproj_strides_0"), val = tensor([1])]; + string attention_16_outproj_pad_type_0 = const()[name = string("attention_16_outproj_pad_type_0"), val = string("valid")]; + tensor attention_16_outproj_pad_0 = const()[name = string("attention_16_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_16_outproj_dilations_0 = const()[name = string("attention_16_outproj_dilations_0"), val = tensor([1])]; + int32 attention_16_outproj_groups_0 = const()[name = string("attention_16_outproj_groups_0"), val = int32(1)]; + tensor attention_16_outproj = conv(dilations = attention_16_outproj_dilations_0, groups = attention_16_outproj_groups_0, pad = attention_16_outproj_pad_0, pad_type = attention_16_outproj_pad_type_0, strides = attention_16_outproj_strides_0, weight = constexpr_blockwise_shift_scale_76, x = attention_16_reshaped)[name = string("attention_16_outproj")]; + tensor block_16_residual_1 = add(x = block_15_residual_2, y = attention_16_outproj)[name = string("block_16_residual_1")]; + tensor block_16_ffn_rmsnorm_abs = abs(x = block_16_residual_1)[name = string("block_16_ffn_rmsnorm_abs")]; + tensor block_16_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_16_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_16_ffn_rmsnorm_maxval = reduce_max(axes = block_16_ffn_rmsnorm_maxval_axes_0, keep_dims = block_16_ffn_rmsnorm_maxval_keep_dims_0, x = block_16_ffn_rmsnorm_abs)[name = string("block_16_ffn_rmsnorm_maxval")]; + fp16 block_16_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_16_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_16_ffn_rmsnorm_maxval_clipped = clip(alpha = block_16_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_16_ffn_rmsnorm_maxval_clipped_beta_0, x = block_16_ffn_rmsnorm_maxval)[name = string("block_16_ffn_rmsnorm_maxval_clipped")]; + tensor block_16_ffn_rmsnorm_scaled = real_div(x = block_16_residual_1, y = block_16_ffn_rmsnorm_maxval_clipped)[name = string("block_16_ffn_rmsnorm_scaled")]; + tensor block_16_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_16_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_16_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_16_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_16_ffn_rmsnorm_scaled)[name = string("block_16_ffn_rmsnorm_squared_sum")]; + fp16 block_16_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_16_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_16_ffn_rmsnorm_rsqrt_epsilon_0, x = block_16_ffn_rmsnorm_squared_sum)[name = string("block_16_ffn_rmsnorm_rsqrt")]; + fp16 block_16_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_16_ffn_rmsnorm_dim_scaled = mul(x = block_16_ffn_rmsnorm_scaled, y = block_16_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_16_ffn_rmsnorm_dim_scaled")]; + tensor block_16_ffn_rmsnorm_normalized = mul(x = block_16_ffn_rmsnorm_dim_scaled, y = block_16_ffn_rmsnorm_rsqrt)[name = string("block_16_ffn_rmsnorm_normalized")]; + tensor block_16_ffn_rmsnorm_y_0 = const()[name = string("block_16_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478500288)))]; + tensor block_16_ffn_rmsnorm = mul(x = block_16_ffn_rmsnorm_normalized, y = block_16_ffn_rmsnorm_y_0)[name = string("block_16_ffn_rmsnorm")]; + tensor block_16_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478502144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481770816))))[name = string("block_16_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_77 = constexpr_blockwise_shift_scale(data = block_16_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481926528))))[name = string("constexpr_blockwise_shift_scale_77")]; + tensor block_16_ffn_inproj_strides_0 = const()[name = string("block_16_ffn_inproj_strides_0"), val = tensor([1])]; + string block_16_ffn_inproj_pad_type_0 = const()[name = string("block_16_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_16_ffn_inproj_pad_0 = const()[name = string("block_16_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_16_ffn_inproj_dilations_0 = const()[name = string("block_16_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_16_ffn_inproj_groups_0 = const()[name = string("block_16_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_16_ffn_inproj = conv(dilations = block_16_ffn_inproj_dilations_0, groups = block_16_ffn_inproj_groups_0, pad = block_16_ffn_inproj_pad_0, pad_type = block_16_ffn_inproj_pad_type_0, strides = block_16_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_77, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_inproj")]; + tensor block_16_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481936320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485204992))))[name = string("block_16_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_78 = constexpr_blockwise_shift_scale(data = block_16_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485360704))))[name = string("constexpr_blockwise_shift_scale_78")]; + tensor block_16_ffn_g_strides_0 = const()[name = string("block_16_ffn_g_strides_0"), val = tensor([1])]; + string block_16_ffn_g_pad_type_0 = const()[name = string("block_16_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_16_ffn_g_pad_0 = const()[name = string("block_16_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_16_ffn_g_dilations_0 = const()[name = string("block_16_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_16_ffn_g_groups_0 = const()[name = string("block_16_ffn_g_groups_0"), val = int32(1)]; + tensor block_16_ffn_g = conv(dilations = block_16_ffn_g_dilations_0, groups = block_16_ffn_g_groups_0, pad = block_16_ffn_g_pad_0, pad_type = block_16_ffn_g_pad_type_0, strides = block_16_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_78, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_g")]; + tensor block_16_ffn_g_activation = silu(x = block_16_ffn_g)[name = string("block_16_ffn_g_activation")]; + tensor block_16_ffn_x_gated = mul(x = block_16_ffn_inproj, y = block_16_ffn_g_activation)[name = string("block_16_ffn_x_gated")]; + tensor block_16_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485370496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488639168))))[name = string("block_16_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_79 = constexpr_blockwise_shift_scale(data = block_16_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488667904))))[name = string("constexpr_blockwise_shift_scale_79")]; + tensor block_16_ffn_outproj_strides_0 = const()[name = string("block_16_ffn_outproj_strides_0"), val = tensor([1])]; + string block_16_ffn_outproj_pad_type_0 = const()[name = string("block_16_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_16_ffn_outproj_pad_0 = const()[name = string("block_16_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_16_ffn_outproj_dilations_0 = const()[name = string("block_16_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_16_ffn_outproj_groups_0 = const()[name = string("block_16_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_16_ffn_outproj = conv(dilations = block_16_ffn_outproj_dilations_0, groups = block_16_ffn_outproj_groups_0, pad = block_16_ffn_outproj_pad_0, pad_type = block_16_ffn_outproj_pad_type_0, strides = block_16_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_79, x = block_16_ffn_x_gated)[name = string("block_16_ffn_outproj")]; + tensor block_16_residual_2 = add(x = block_16_ffn_outproj, y = block_16_residual_1)[name = string("block_16_residual_2")]; + tensor block_17_attention_rmsnorm_abs = abs(x = block_16_residual_2)[name = string("block_17_attention_rmsnorm_abs")]; + tensor block_17_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_17_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_17_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_17_attention_rmsnorm_maxval = reduce_max(axes = block_17_attention_rmsnorm_maxval_axes_0, keep_dims = block_17_attention_rmsnorm_maxval_keep_dims_0, x = block_17_attention_rmsnorm_abs)[name = string("block_17_attention_rmsnorm_maxval")]; + fp16 block_17_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_17_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_17_attention_rmsnorm_maxval_clipped = clip(alpha = block_17_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_17_attention_rmsnorm_maxval_clipped_beta_0, x = block_17_attention_rmsnorm_maxval)[name = string("block_17_attention_rmsnorm_maxval_clipped")]; + tensor block_17_attention_rmsnorm_scaled = real_div(x = block_16_residual_2, y = block_17_attention_rmsnorm_maxval_clipped)[name = string("block_17_attention_rmsnorm_scaled")]; + tensor block_17_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_17_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_17_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_17_attention_rmsnorm_squared_sum_keep_dims_0, x = block_17_attention_rmsnorm_scaled)[name = string("block_17_attention_rmsnorm_squared_sum")]; + fp16 block_17_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_17_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_17_attention_rmsnorm_rsqrt_epsilon_0, x = block_17_attention_rmsnorm_squared_sum)[name = string("block_17_attention_rmsnorm_rsqrt")]; + fp16 block_17_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_17_attention_rmsnorm_dim_scaled = mul(x = block_17_attention_rmsnorm_scaled, y = block_17_attention_rmsnorm_dim_scaled_y_0)[name = string("block_17_attention_rmsnorm_dim_scaled")]; + tensor block_17_attention_rmsnorm_normalized = mul(x = block_17_attention_rmsnorm_dim_scaled, y = block_17_attention_rmsnorm_rsqrt)[name = string("block_17_attention_rmsnorm_normalized")]; + tensor block_17_attention_rmsnorm_y_0 = const()[name = string("block_17_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488669760)))]; + tensor block_17_attention_rmsnorm = mul(x = block_17_attention_rmsnorm_normalized, y = block_17_attention_rmsnorm_y_0)[name = string("block_17_attention_rmsnorm")]; + tensor attention_17_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488671616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489445824))))[name = string("attention_17_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_80 = constexpr_blockwise_shift_scale(data = attention_17_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489482752))))[name = string("constexpr_blockwise_shift_scale_80")]; + tensor attention_17_qkvproj_bias_0 = const()[name = string("attention_17_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489485120)))]; + tensor attention_17_qkvproj_strides_0 = const()[name = string("attention_17_qkvproj_strides_0"), val = tensor([1])]; + string attention_17_qkvproj_pad_type_0 = const()[name = string("attention_17_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_17_qkvproj_pad_0 = const()[name = string("attention_17_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_17_qkvproj_dilations_0 = const()[name = string("attention_17_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_17_qkvproj_groups_0 = const()[name = string("attention_17_qkvproj_groups_0"), val = int32(1)]; + tensor attention_17_qkvproj = conv(bias = attention_17_qkvproj_bias_0, dilations = attention_17_qkvproj_dilations_0, groups = attention_17_qkvproj_groups_0, pad = attention_17_qkvproj_pad_0, pad_type = attention_17_qkvproj_pad_type_0, strides = attention_17_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_80, x = block_17_attention_rmsnorm)[name = string("attention_17_qkvproj")]; + tensor attention_17_head_reshape_shape_0 = const()[name = string("attention_17_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_17_head_reshape = reshape(shape = attention_17_head_reshape_shape_0, x = attention_17_qkvproj)[name = string("attention_17_head_reshape")]; + tensor attention_17_head_transpose_perm_0 = const()[name = string("attention_17_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_17_split_qkv_heads_axis_0 = const()[name = string("attention_17_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_17_split_qkv_heads_split_sizes_0 = const()[name = string("attention_17_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_17_head_transpose = transpose(perm = attention_17_head_transpose_perm_0, x = attention_17_head_reshape)[name = string("transpose_14")]; + tensor attention_17_split_qkv_heads_0, tensor attention_17_split_qkv_heads_1, tensor attention_17_split_qkv_heads_2 = split(axis = attention_17_split_qkv_heads_axis_0, split_sizes = attention_17_split_qkv_heads_split_sizes_0, x = attention_17_head_transpose)[name = string("attention_17_split_qkv_heads")]; + tensor attention_17_q_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_17_q_rope_lhs_mult")]; + int32 attention_17_q_rotate_half_split_num_splits_0 = const()[name = string("attention_17_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_17_q_rotate_half_split_axis_0 = const()[name = string("attention_17_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_17_q_rotate_half_split_0, tensor attention_17_q_rotate_half_split_1 = split(axis = attention_17_q_rotate_half_split_axis_0, num_splits = attention_17_q_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_0)[name = string("attention_17_q_rotate_half_split")]; + fp16 attention_17_q_rotate_half_neg_y_0 = const()[name = string("attention_17_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_17_q_rotate_half_neg = mul(x = attention_17_q_rotate_half_split_1, y = attention_17_q_rotate_half_neg_y_0)[name = string("attention_17_q_rotate_half_neg")]; + int32 attention_17_q_rotate_half_concat_axis_0 = const()[name = string("attention_17_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_17_q_rotate_half_concat_interleave_0 = const()[name = string("attention_17_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_17_q_rotate_half_concat = concat(axis = attention_17_q_rotate_half_concat_axis_0, interleave = attention_17_q_rotate_half_concat_interleave_0, values = (attention_17_q_rotate_half_neg, attention_17_q_rotate_half_split_0))[name = string("attention_17_q_rotate_half_concat")]; + tensor attention_17_q_rope_rhs_mult = mul(x = attention_17_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_q_rope_rhs_mult")]; + tensor attention_17_q_rope = add(x = attention_17_q_rope_lhs_mult, y = attention_17_q_rope_rhs_mult)[name = string("attention_17_q_rope")]; + tensor attention_17_k_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_17_k_rope_lhs_mult")]; + int32 attention_17_k_rotate_half_split_num_splits_0 = const()[name = string("attention_17_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_17_k_rotate_half_split_axis_0 = const()[name = string("attention_17_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_17_k_rotate_half_split_0, tensor attention_17_k_rotate_half_split_1 = split(axis = attention_17_k_rotate_half_split_axis_0, num_splits = attention_17_k_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_1)[name = string("attention_17_k_rotate_half_split")]; + fp16 attention_17_k_rotate_half_neg_y_0 = const()[name = string("attention_17_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_17_k_rotate_half_neg = mul(x = attention_17_k_rotate_half_split_1, y = attention_17_k_rotate_half_neg_y_0)[name = string("attention_17_k_rotate_half_neg")]; + int32 attention_17_k_rotate_half_concat_axis_0 = const()[name = string("attention_17_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_17_k_rotate_half_concat_interleave_0 = const()[name = string("attention_17_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_17_k_rotate_half_concat = concat(axis = attention_17_k_rotate_half_concat_axis_0, interleave = attention_17_k_rotate_half_concat_interleave_0, values = (attention_17_k_rotate_half_neg, attention_17_k_rotate_half_split_0))[name = string("attention_17_k_rotate_half_concat")]; + tensor attention_17_k_rope_rhs_mult = mul(x = attention_17_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_k_rope_rhs_mult")]; + tensor attention_17_k_rope = add(x = attention_17_k_rope_lhs_mult, y = attention_17_k_rope_rhs_mult)[name = string("attention_17_k_rope")]; + int32 attention_17_q_splits_axis_0 = const()[name = string("attention_17_q_splits_axis_0"), val = int32(1)]; + int32 attention_17_q_splits_num_splits_0 = const()[name = string("attention_17_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_17_q_splits_0, tensor attention_17_q_splits_1 = split(axis = attention_17_q_splits_axis_0, num_splits = attention_17_q_splits_num_splits_0, x = attention_17_q_rope)[name = string("attention_17_q_splits")]; + tensor attention_17_update_begin_0_values0_0 = const()[name = string("attention_17_update_begin_0_values0_0"), val = tensor([17])]; + tensor attention_17_update_begin_0_values1_0 = const()[name = string("attention_17_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_17_update_begin_0_values3_0 = const()[name = string("attention_17_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_17_update_begin_0_axis_0 = const()[name = string("attention_17_update_begin_0_axis_0"), val = int32(0)]; + bool attention_17_update_begin_0_interleave_0 = const()[name = string("attention_17_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_17_update_begin_0 = concat(axis = attention_17_update_begin_0_axis_0, interleave = attention_17_update_begin_0_interleave_0, values = (attention_17_update_begin_0_values0_0, attention_17_update_begin_0_values1_0, query_pos1, attention_17_update_begin_0_values3_0))[name = string("attention_17_update_begin_0")]; + tensor attention_17_update_end_0_values0_0 = const()[name = string("attention_17_update_end_0_values0_0"), val = tensor([18])]; + tensor attention_17_update_end_0_values1_0 = const()[name = string("attention_17_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_17_update_end_0_values3_0 = const()[name = string("attention_17_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_17_update_end_0_axis_0 = const()[name = string("attention_17_update_end_0_axis_0"), val = int32(0)]; + bool attention_17_update_end_0_interleave_0 = const()[name = string("attention_17_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_17_update_end_0 = concat(axis = attention_17_update_end_0_axis_0, interleave = attention_17_update_end_0_interleave_0, values = (attention_17_update_end_0_values0_0, attention_17_update_end_0_values1_0, end_pos_0, attention_17_update_end_0_values3_0))[name = string("attention_17_update_end_0")]; + tensor attention_17_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_updated_key_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_key_cache_0_squeeze_mask_0, update = attention_17_k_rope, x = coreml_update_state_32)[name = string("attention_17_updated_key_cache_0")]; + write_state(data = attention_17_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_34 = read_state(input = key_cache_state)[name = string("coreml_update_state_82")]; + tensor attention_17_key_cache_begin_0 = const()[name = string("attention_17_key_cache_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor attention_17_key_cache_end_0 = const()[name = string("attention_17_key_cache_end_0"), val = tensor([18, 2, 512, 64])]; + tensor attention_17_key_cache_squeeze_mask_0 = const()[name = string("attention_17_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_key_cache = slice_by_index(begin = attention_17_key_cache_begin_0, end = attention_17_key_cache_end_0, squeeze_mask = attention_17_key_cache_squeeze_mask_0, x = coreml_update_state_34)[name = string("attention_17_key_cache")]; + int32 attention_17_key_cache_head_axis_0 = const()[name = string("attention_17_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_17_key_cache_head_num_splits_0 = const()[name = string("attention_17_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_17_key_cache_head_0, tensor attention_17_key_cache_head_1 = split(axis = attention_17_key_cache_head_axis_0, num_splits = attention_17_key_cache_head_num_splits_0, x = attention_17_key_cache)[name = string("attention_17_key_cache_head")]; + tensor attention_17_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_updated_value_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_value_cache_0_squeeze_mask_0, update = attention_17_split_qkv_heads_2, x = coreml_update_state_33)[name = string("attention_17_updated_value_cache_0")]; + write_state(data = attention_17_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_35 = read_state(input = value_cache_state)[name = string("coreml_update_state_83")]; + tensor attention_17_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_17_slice_current_layer_value_cache_begin_0"), val = tensor([17, 0, 0, 0])]; + tensor attention_17_slice_current_layer_value_cache_end_0 = const()[name = string("attention_17_slice_current_layer_value_cache_end_0"), val = tensor([18, 2, 512, 64])]; + tensor attention_17_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_17_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_17_slice_current_layer_value_cache = slice_by_index(begin = attention_17_slice_current_layer_value_cache_begin_0, end = attention_17_slice_current_layer_value_cache_end_0, squeeze_mask = attention_17_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_35)[name = string("attention_17_slice_current_layer_value_cache")]; + int32 attention_17_slice_value_cache_heads_axis_0 = const()[name = string("attention_17_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_17_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_17_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_17_slice_value_cache_heads_0, tensor attention_17_slice_value_cache_heads_1 = split(axis = attention_17_slice_value_cache_heads_axis_0, num_splits = attention_17_slice_value_cache_heads_num_splits_0, x = attention_17_slice_current_layer_value_cache)[name = string("attention_17_slice_value_cache_heads")]; + bool attention_17_scores_0_transpose_y_0 = const()[name = string("attention_17_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_17_scores_0_transpose_x_0 = const()[name = string("attention_17_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_17_scores_0 = matmul(transpose_x = attention_17_scores_0_transpose_x_0, transpose_y = attention_17_scores_0_transpose_y_0, x = attention_17_key_cache_head_0, y = attention_17_q_splits_0)[name = string("attention_17_scores_0")]; + fp16 attention_17_scaled_scores_0_y_0 = const()[name = string("attention_17_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_17_scaled_scores_0 = mul(x = attention_17_scores_0, y = attention_17_scaled_scores_0_y_0)[name = string("attention_17_scaled_scores_0")]; + tensor attention_17_masked_scaled_scores_0 = add(x = attention_17_scaled_scores_0, y = transpose_0)[name = string("attention_17_masked_scaled_scores_0")]; + int32 softmax_34_axis_0 = const()[name = string("softmax_34_axis_0"), val = int32(-2)]; + tensor softmax_34 = softmax(axis = softmax_34_axis_0, x = attention_17_masked_scaled_scores_0)[name = string("softmax_34")]; + bool attention_17_attention_0_transpose_x_0 = const()[name = string("attention_17_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_17_attention_0_transpose_y_0 = const()[name = string("attention_17_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_17_attention_0 = matmul(transpose_x = attention_17_attention_0_transpose_x_0, transpose_y = attention_17_attention_0_transpose_y_0, x = softmax_34, y = attention_17_slice_value_cache_heads_0)[name = string("attention_17_attention_0")]; + bool attention_17_scores_1_transpose_y_0 = const()[name = string("attention_17_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_17_scores_1_transpose_x_0 = const()[name = string("attention_17_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_17_scores_1 = matmul(transpose_x = attention_17_scores_1_transpose_x_0, transpose_y = attention_17_scores_1_transpose_y_0, x = attention_17_key_cache_head_1, y = attention_17_q_splits_1)[name = string("attention_17_scores_1")]; + fp16 attention_17_scaled_scores_1_y_0 = const()[name = string("attention_17_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_17_scaled_scores_1 = mul(x = attention_17_scores_1, y = attention_17_scaled_scores_1_y_0)[name = string("attention_17_scaled_scores_1")]; + tensor attention_17_masked_scaled_scores_1 = add(x = attention_17_scaled_scores_1, y = transpose_0)[name = string("attention_17_masked_scaled_scores_1")]; + int32 softmax_35_axis_0 = const()[name = string("softmax_35_axis_0"), val = int32(-2)]; + tensor softmax_35 = softmax(axis = softmax_35_axis_0, x = attention_17_masked_scaled_scores_1)[name = string("softmax_35")]; + bool attention_17_attention_1_transpose_x_0 = const()[name = string("attention_17_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_17_attention_1_transpose_y_0 = const()[name = string("attention_17_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_17_attention_1 = matmul(transpose_x = attention_17_attention_1_transpose_x_0, transpose_y = attention_17_attention_1_transpose_y_0, x = softmax_35, y = attention_17_slice_value_cache_heads_1)[name = string("attention_17_attention_1")]; + int32 attention_17_concat_attention_all_heads_axis_0 = const()[name = string("attention_17_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_17_concat_attention_all_heads_interleave_0 = const()[name = string("attention_17_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_17_concat_attention_all_heads = concat(axis = attention_17_concat_attention_all_heads_axis_0, interleave = attention_17_concat_attention_all_heads_interleave_0, values = (attention_17_attention_0, attention_17_attention_1))[name = string("attention_17_concat_attention_all_heads")]; + tensor attention_17_channels_first_retransposed_perm_0 = const()[name = string("attention_17_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_17_reshaped_shape_0 = const()[name = string("attention_17_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_17_channels_first_retransposed = transpose(perm = attention_17_channels_first_retransposed_perm_0, x = attention_17_concat_attention_all_heads)[name = string("transpose_13")]; + tensor attention_17_reshaped = reshape(shape = attention_17_reshaped_shape_0, x = attention_17_channels_first_retransposed)[name = string("attention_17_reshaped")]; + tensor attention_17_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489487488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490089664))))[name = string("attention_17_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_81 = constexpr_blockwise_shift_scale(data = attention_17_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490118400))))[name = string("constexpr_blockwise_shift_scale_81")]; + tensor attention_17_outproj_strides_0 = const()[name = string("attention_17_outproj_strides_0"), val = tensor([1])]; + string attention_17_outproj_pad_type_0 = const()[name = string("attention_17_outproj_pad_type_0"), val = string("valid")]; + tensor attention_17_outproj_pad_0 = const()[name = string("attention_17_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_17_outproj_dilations_0 = const()[name = string("attention_17_outproj_dilations_0"), val = tensor([1])]; + int32 attention_17_outproj_groups_0 = const()[name = string("attention_17_outproj_groups_0"), val = int32(1)]; + tensor attention_17_outproj = conv(dilations = attention_17_outproj_dilations_0, groups = attention_17_outproj_groups_0, pad = attention_17_outproj_pad_0, pad_type = attention_17_outproj_pad_type_0, strides = attention_17_outproj_strides_0, weight = constexpr_blockwise_shift_scale_81, x = attention_17_reshaped)[name = string("attention_17_outproj")]; + tensor block_17_residual_1 = add(x = block_16_residual_2, y = attention_17_outproj)[name = string("block_17_residual_1")]; + tensor block_17_ffn_rmsnorm_abs = abs(x = block_17_residual_1)[name = string("block_17_ffn_rmsnorm_abs")]; + tensor block_17_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_17_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_17_ffn_rmsnorm_maxval = reduce_max(axes = block_17_ffn_rmsnorm_maxval_axes_0, keep_dims = block_17_ffn_rmsnorm_maxval_keep_dims_0, x = block_17_ffn_rmsnorm_abs)[name = string("block_17_ffn_rmsnorm_maxval")]; + fp16 block_17_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_17_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_17_ffn_rmsnorm_maxval_clipped = clip(alpha = block_17_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_17_ffn_rmsnorm_maxval_clipped_beta_0, x = block_17_ffn_rmsnorm_maxval)[name = string("block_17_ffn_rmsnorm_maxval_clipped")]; + tensor block_17_ffn_rmsnorm_scaled = real_div(x = block_17_residual_1, y = block_17_ffn_rmsnorm_maxval_clipped)[name = string("block_17_ffn_rmsnorm_scaled")]; + tensor block_17_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_17_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_17_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_17_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_17_ffn_rmsnorm_scaled)[name = string("block_17_ffn_rmsnorm_squared_sum")]; + fp16 block_17_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_17_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_17_ffn_rmsnorm_rsqrt_epsilon_0, x = block_17_ffn_rmsnorm_squared_sum)[name = string("block_17_ffn_rmsnorm_rsqrt")]; + fp16 block_17_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_17_ffn_rmsnorm_dim_scaled = mul(x = block_17_ffn_rmsnorm_scaled, y = block_17_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_17_ffn_rmsnorm_dim_scaled")]; + tensor block_17_ffn_rmsnorm_normalized = mul(x = block_17_ffn_rmsnorm_dim_scaled, y = block_17_ffn_rmsnorm_rsqrt)[name = string("block_17_ffn_rmsnorm_normalized")]; + tensor block_17_ffn_rmsnorm_y_0 = const()[name = string("block_17_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490120256)))]; + tensor block_17_ffn_rmsnorm = mul(x = block_17_ffn_rmsnorm_normalized, y = block_17_ffn_rmsnorm_y_0)[name = string("block_17_ffn_rmsnorm")]; + tensor block_17_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490122112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493390784))))[name = string("block_17_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_82 = constexpr_blockwise_shift_scale(data = block_17_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493546496))))[name = string("constexpr_blockwise_shift_scale_82")]; + tensor block_17_ffn_inproj_strides_0 = const()[name = string("block_17_ffn_inproj_strides_0"), val = tensor([1])]; + string block_17_ffn_inproj_pad_type_0 = const()[name = string("block_17_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_17_ffn_inproj_pad_0 = const()[name = string("block_17_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_17_ffn_inproj_dilations_0 = const()[name = string("block_17_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_17_ffn_inproj_groups_0 = const()[name = string("block_17_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_17_ffn_inproj = conv(dilations = block_17_ffn_inproj_dilations_0, groups = block_17_ffn_inproj_groups_0, pad = block_17_ffn_inproj_pad_0, pad_type = block_17_ffn_inproj_pad_type_0, strides = block_17_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_82, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_inproj")]; + tensor block_17_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496824960))))[name = string("block_17_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_83 = constexpr_blockwise_shift_scale(data = block_17_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496980672))))[name = string("constexpr_blockwise_shift_scale_83")]; + tensor block_17_ffn_g_strides_0 = const()[name = string("block_17_ffn_g_strides_0"), val = tensor([1])]; + string block_17_ffn_g_pad_type_0 = const()[name = string("block_17_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_17_ffn_g_pad_0 = const()[name = string("block_17_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_17_ffn_g_dilations_0 = const()[name = string("block_17_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_17_ffn_g_groups_0 = const()[name = string("block_17_ffn_g_groups_0"), val = int32(1)]; + tensor block_17_ffn_g = conv(dilations = block_17_ffn_g_dilations_0, groups = block_17_ffn_g_groups_0, pad = block_17_ffn_g_pad_0, pad_type = block_17_ffn_g_pad_type_0, strides = block_17_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_83, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_g")]; + tensor block_17_ffn_g_activation = silu(x = block_17_ffn_g)[name = string("block_17_ffn_g_activation")]; + tensor block_17_ffn_x_gated = mul(x = block_17_ffn_inproj, y = block_17_ffn_g_activation)[name = string("block_17_ffn_x_gated")]; + tensor block_17_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496990464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500259136))))[name = string("block_17_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_84 = constexpr_blockwise_shift_scale(data = block_17_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500287872))))[name = string("constexpr_blockwise_shift_scale_84")]; + tensor block_17_ffn_outproj_strides_0 = const()[name = string("block_17_ffn_outproj_strides_0"), val = tensor([1])]; + string block_17_ffn_outproj_pad_type_0 = const()[name = string("block_17_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_17_ffn_outproj_pad_0 = const()[name = string("block_17_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_17_ffn_outproj_dilations_0 = const()[name = string("block_17_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_17_ffn_outproj_groups_0 = const()[name = string("block_17_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_17_ffn_outproj = conv(dilations = block_17_ffn_outproj_dilations_0, groups = block_17_ffn_outproj_groups_0, pad = block_17_ffn_outproj_pad_0, pad_type = block_17_ffn_outproj_pad_type_0, strides = block_17_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_84, x = block_17_ffn_x_gated)[name = string("block_17_ffn_outproj")]; + tensor block_17_residual_2 = add(x = block_17_ffn_outproj, y = block_17_residual_1)[name = string("block_17_residual_2")]; + tensor block_18_attention_rmsnorm_abs = abs(x = block_17_residual_2)[name = string("block_18_attention_rmsnorm_abs")]; + tensor block_18_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_18_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_18_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_18_attention_rmsnorm_maxval = reduce_max(axes = block_18_attention_rmsnorm_maxval_axes_0, keep_dims = block_18_attention_rmsnorm_maxval_keep_dims_0, x = block_18_attention_rmsnorm_abs)[name = string("block_18_attention_rmsnorm_maxval")]; + fp16 block_18_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_18_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_18_attention_rmsnorm_maxval_clipped = clip(alpha = block_18_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_18_attention_rmsnorm_maxval_clipped_beta_0, x = block_18_attention_rmsnorm_maxval)[name = string("block_18_attention_rmsnorm_maxval_clipped")]; + tensor block_18_attention_rmsnorm_scaled = real_div(x = block_17_residual_2, y = block_18_attention_rmsnorm_maxval_clipped)[name = string("block_18_attention_rmsnorm_scaled")]; + tensor block_18_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_18_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_18_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_18_attention_rmsnorm_squared_sum_keep_dims_0, x = block_18_attention_rmsnorm_scaled)[name = string("block_18_attention_rmsnorm_squared_sum")]; + fp16 block_18_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_18_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_18_attention_rmsnorm_rsqrt_epsilon_0, x = block_18_attention_rmsnorm_squared_sum)[name = string("block_18_attention_rmsnorm_rsqrt")]; + fp16 block_18_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_18_attention_rmsnorm_dim_scaled = mul(x = block_18_attention_rmsnorm_scaled, y = block_18_attention_rmsnorm_dim_scaled_y_0)[name = string("block_18_attention_rmsnorm_dim_scaled")]; + tensor block_18_attention_rmsnorm_normalized = mul(x = block_18_attention_rmsnorm_dim_scaled, y = block_18_attention_rmsnorm_rsqrt)[name = string("block_18_attention_rmsnorm_normalized")]; + tensor block_18_attention_rmsnorm_y_0 = const()[name = string("block_18_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500289728)))]; + tensor block_18_attention_rmsnorm = mul(x = block_18_attention_rmsnorm_normalized, y = block_18_attention_rmsnorm_y_0)[name = string("block_18_attention_rmsnorm")]; + tensor attention_18_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500291584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501065792))))[name = string("attention_18_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_85 = constexpr_blockwise_shift_scale(data = attention_18_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501102720))))[name = string("constexpr_blockwise_shift_scale_85")]; + tensor attention_18_qkvproj_bias_0 = const()[name = string("attention_18_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501105088)))]; + tensor attention_18_qkvproj_strides_0 = const()[name = string("attention_18_qkvproj_strides_0"), val = tensor([1])]; + string attention_18_qkvproj_pad_type_0 = const()[name = string("attention_18_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_18_qkvproj_pad_0 = const()[name = string("attention_18_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_18_qkvproj_dilations_0 = const()[name = string("attention_18_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_18_qkvproj_groups_0 = const()[name = string("attention_18_qkvproj_groups_0"), val = int32(1)]; + tensor attention_18_qkvproj = conv(bias = attention_18_qkvproj_bias_0, dilations = attention_18_qkvproj_dilations_0, groups = attention_18_qkvproj_groups_0, pad = attention_18_qkvproj_pad_0, pad_type = attention_18_qkvproj_pad_type_0, strides = attention_18_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_85, x = block_18_attention_rmsnorm)[name = string("attention_18_qkvproj")]; + tensor attention_18_head_reshape_shape_0 = const()[name = string("attention_18_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_18_head_reshape = reshape(shape = attention_18_head_reshape_shape_0, x = attention_18_qkvproj)[name = string("attention_18_head_reshape")]; + tensor attention_18_head_transpose_perm_0 = const()[name = string("attention_18_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_18_split_qkv_heads_axis_0 = const()[name = string("attention_18_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_18_split_qkv_heads_split_sizes_0 = const()[name = string("attention_18_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_18_head_transpose = transpose(perm = attention_18_head_transpose_perm_0, x = attention_18_head_reshape)[name = string("transpose_12")]; + tensor attention_18_split_qkv_heads_0, tensor attention_18_split_qkv_heads_1, tensor attention_18_split_qkv_heads_2 = split(axis = attention_18_split_qkv_heads_axis_0, split_sizes = attention_18_split_qkv_heads_split_sizes_0, x = attention_18_head_transpose)[name = string("attention_18_split_qkv_heads")]; + tensor attention_18_q_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_18_q_rope_lhs_mult")]; + int32 attention_18_q_rotate_half_split_num_splits_0 = const()[name = string("attention_18_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_18_q_rotate_half_split_axis_0 = const()[name = string("attention_18_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_18_q_rotate_half_split_0, tensor attention_18_q_rotate_half_split_1 = split(axis = attention_18_q_rotate_half_split_axis_0, num_splits = attention_18_q_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_0)[name = string("attention_18_q_rotate_half_split")]; + fp16 attention_18_q_rotate_half_neg_y_0 = const()[name = string("attention_18_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_18_q_rotate_half_neg = mul(x = attention_18_q_rotate_half_split_1, y = attention_18_q_rotate_half_neg_y_0)[name = string("attention_18_q_rotate_half_neg")]; + int32 attention_18_q_rotate_half_concat_axis_0 = const()[name = string("attention_18_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_18_q_rotate_half_concat_interleave_0 = const()[name = string("attention_18_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_18_q_rotate_half_concat = concat(axis = attention_18_q_rotate_half_concat_axis_0, interleave = attention_18_q_rotate_half_concat_interleave_0, values = (attention_18_q_rotate_half_neg, attention_18_q_rotate_half_split_0))[name = string("attention_18_q_rotate_half_concat")]; + tensor attention_18_q_rope_rhs_mult = mul(x = attention_18_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_q_rope_rhs_mult")]; + tensor attention_18_q_rope = add(x = attention_18_q_rope_lhs_mult, y = attention_18_q_rope_rhs_mult)[name = string("attention_18_q_rope")]; + tensor attention_18_k_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_18_k_rope_lhs_mult")]; + int32 attention_18_k_rotate_half_split_num_splits_0 = const()[name = string("attention_18_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_18_k_rotate_half_split_axis_0 = const()[name = string("attention_18_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_18_k_rotate_half_split_0, tensor attention_18_k_rotate_half_split_1 = split(axis = attention_18_k_rotate_half_split_axis_0, num_splits = attention_18_k_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_1)[name = string("attention_18_k_rotate_half_split")]; + fp16 attention_18_k_rotate_half_neg_y_0 = const()[name = string("attention_18_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_18_k_rotate_half_neg = mul(x = attention_18_k_rotate_half_split_1, y = attention_18_k_rotate_half_neg_y_0)[name = string("attention_18_k_rotate_half_neg")]; + int32 attention_18_k_rotate_half_concat_axis_0 = const()[name = string("attention_18_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_18_k_rotate_half_concat_interleave_0 = const()[name = string("attention_18_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_18_k_rotate_half_concat = concat(axis = attention_18_k_rotate_half_concat_axis_0, interleave = attention_18_k_rotate_half_concat_interleave_0, values = (attention_18_k_rotate_half_neg, attention_18_k_rotate_half_split_0))[name = string("attention_18_k_rotate_half_concat")]; + tensor attention_18_k_rope_rhs_mult = mul(x = attention_18_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_k_rope_rhs_mult")]; + tensor attention_18_k_rope = add(x = attention_18_k_rope_lhs_mult, y = attention_18_k_rope_rhs_mult)[name = string("attention_18_k_rope")]; + int32 attention_18_q_splits_axis_0 = const()[name = string("attention_18_q_splits_axis_0"), val = int32(1)]; + int32 attention_18_q_splits_num_splits_0 = const()[name = string("attention_18_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_18_q_splits_0, tensor attention_18_q_splits_1 = split(axis = attention_18_q_splits_axis_0, num_splits = attention_18_q_splits_num_splits_0, x = attention_18_q_rope)[name = string("attention_18_q_splits")]; + tensor attention_18_update_begin_0_values0_0 = const()[name = string("attention_18_update_begin_0_values0_0"), val = tensor([18])]; + tensor attention_18_update_begin_0_values1_0 = const()[name = string("attention_18_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_18_update_begin_0_values3_0 = const()[name = string("attention_18_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_18_update_begin_0_axis_0 = const()[name = string("attention_18_update_begin_0_axis_0"), val = int32(0)]; + bool attention_18_update_begin_0_interleave_0 = const()[name = string("attention_18_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_18_update_begin_0 = concat(axis = attention_18_update_begin_0_axis_0, interleave = attention_18_update_begin_0_interleave_0, values = (attention_18_update_begin_0_values0_0, attention_18_update_begin_0_values1_0, query_pos1, attention_18_update_begin_0_values3_0))[name = string("attention_18_update_begin_0")]; + tensor attention_18_update_end_0_values0_0 = const()[name = string("attention_18_update_end_0_values0_0"), val = tensor([19])]; + tensor attention_18_update_end_0_values1_0 = const()[name = string("attention_18_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_18_update_end_0_values3_0 = const()[name = string("attention_18_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_18_update_end_0_axis_0 = const()[name = string("attention_18_update_end_0_axis_0"), val = int32(0)]; + bool attention_18_update_end_0_interleave_0 = const()[name = string("attention_18_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_18_update_end_0 = concat(axis = attention_18_update_end_0_axis_0, interleave = attention_18_update_end_0_interleave_0, values = (attention_18_update_end_0_values0_0, attention_18_update_end_0_values1_0, end_pos_0, attention_18_update_end_0_values3_0))[name = string("attention_18_update_end_0")]; + tensor attention_18_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_updated_key_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_key_cache_0_squeeze_mask_0, update = attention_18_k_rope, x = coreml_update_state_34)[name = string("attention_18_updated_key_cache_0")]; + write_state(data = attention_18_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_36 = read_state(input = key_cache_state)[name = string("coreml_update_state_84")]; + tensor attention_18_key_cache_begin_0 = const()[name = string("attention_18_key_cache_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor attention_18_key_cache_end_0 = const()[name = string("attention_18_key_cache_end_0"), val = tensor([19, 2, 512, 64])]; + tensor attention_18_key_cache_squeeze_mask_0 = const()[name = string("attention_18_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_key_cache = slice_by_index(begin = attention_18_key_cache_begin_0, end = attention_18_key_cache_end_0, squeeze_mask = attention_18_key_cache_squeeze_mask_0, x = coreml_update_state_36)[name = string("attention_18_key_cache")]; + int32 attention_18_key_cache_head_axis_0 = const()[name = string("attention_18_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_18_key_cache_head_num_splits_0 = const()[name = string("attention_18_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_18_key_cache_head_0, tensor attention_18_key_cache_head_1 = split(axis = attention_18_key_cache_head_axis_0, num_splits = attention_18_key_cache_head_num_splits_0, x = attention_18_key_cache)[name = string("attention_18_key_cache_head")]; + tensor attention_18_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_updated_value_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_value_cache_0_squeeze_mask_0, update = attention_18_split_qkv_heads_2, x = coreml_update_state_35)[name = string("attention_18_updated_value_cache_0")]; + write_state(data = attention_18_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_37 = read_state(input = value_cache_state)[name = string("coreml_update_state_85")]; + tensor attention_18_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_18_slice_current_layer_value_cache_begin_0"), val = tensor([18, 0, 0, 0])]; + tensor attention_18_slice_current_layer_value_cache_end_0 = const()[name = string("attention_18_slice_current_layer_value_cache_end_0"), val = tensor([19, 2, 512, 64])]; + tensor attention_18_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_18_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_18_slice_current_layer_value_cache = slice_by_index(begin = attention_18_slice_current_layer_value_cache_begin_0, end = attention_18_slice_current_layer_value_cache_end_0, squeeze_mask = attention_18_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_37)[name = string("attention_18_slice_current_layer_value_cache")]; + int32 attention_18_slice_value_cache_heads_axis_0 = const()[name = string("attention_18_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_18_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_18_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_18_slice_value_cache_heads_0, tensor attention_18_slice_value_cache_heads_1 = split(axis = attention_18_slice_value_cache_heads_axis_0, num_splits = attention_18_slice_value_cache_heads_num_splits_0, x = attention_18_slice_current_layer_value_cache)[name = string("attention_18_slice_value_cache_heads")]; + bool attention_18_scores_0_transpose_y_0 = const()[name = string("attention_18_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_18_scores_0_transpose_x_0 = const()[name = string("attention_18_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_18_scores_0 = matmul(transpose_x = attention_18_scores_0_transpose_x_0, transpose_y = attention_18_scores_0_transpose_y_0, x = attention_18_key_cache_head_0, y = attention_18_q_splits_0)[name = string("attention_18_scores_0")]; + fp16 attention_18_scaled_scores_0_y_0 = const()[name = string("attention_18_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_18_scaled_scores_0 = mul(x = attention_18_scores_0, y = attention_18_scaled_scores_0_y_0)[name = string("attention_18_scaled_scores_0")]; + tensor attention_18_masked_scaled_scores_0 = add(x = attention_18_scaled_scores_0, y = transpose_0)[name = string("attention_18_masked_scaled_scores_0")]; + int32 softmax_36_axis_0 = const()[name = string("softmax_36_axis_0"), val = int32(-2)]; + tensor softmax_36 = softmax(axis = softmax_36_axis_0, x = attention_18_masked_scaled_scores_0)[name = string("softmax_36")]; + bool attention_18_attention_0_transpose_x_0 = const()[name = string("attention_18_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_18_attention_0_transpose_y_0 = const()[name = string("attention_18_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_18_attention_0 = matmul(transpose_x = attention_18_attention_0_transpose_x_0, transpose_y = attention_18_attention_0_transpose_y_0, x = softmax_36, y = attention_18_slice_value_cache_heads_0)[name = string("attention_18_attention_0")]; + bool attention_18_scores_1_transpose_y_0 = const()[name = string("attention_18_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_18_scores_1_transpose_x_0 = const()[name = string("attention_18_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_18_scores_1 = matmul(transpose_x = attention_18_scores_1_transpose_x_0, transpose_y = attention_18_scores_1_transpose_y_0, x = attention_18_key_cache_head_1, y = attention_18_q_splits_1)[name = string("attention_18_scores_1")]; + fp16 attention_18_scaled_scores_1_y_0 = const()[name = string("attention_18_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_18_scaled_scores_1 = mul(x = attention_18_scores_1, y = attention_18_scaled_scores_1_y_0)[name = string("attention_18_scaled_scores_1")]; + tensor attention_18_masked_scaled_scores_1 = add(x = attention_18_scaled_scores_1, y = transpose_0)[name = string("attention_18_masked_scaled_scores_1")]; + int32 softmax_37_axis_0 = const()[name = string("softmax_37_axis_0"), val = int32(-2)]; + tensor softmax_37 = softmax(axis = softmax_37_axis_0, x = attention_18_masked_scaled_scores_1)[name = string("softmax_37")]; + bool attention_18_attention_1_transpose_x_0 = const()[name = string("attention_18_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_18_attention_1_transpose_y_0 = const()[name = string("attention_18_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_18_attention_1 = matmul(transpose_x = attention_18_attention_1_transpose_x_0, transpose_y = attention_18_attention_1_transpose_y_0, x = softmax_37, y = attention_18_slice_value_cache_heads_1)[name = string("attention_18_attention_1")]; + int32 attention_18_concat_attention_all_heads_axis_0 = const()[name = string("attention_18_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_18_concat_attention_all_heads_interleave_0 = const()[name = string("attention_18_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_18_concat_attention_all_heads = concat(axis = attention_18_concat_attention_all_heads_axis_0, interleave = attention_18_concat_attention_all_heads_interleave_0, values = (attention_18_attention_0, attention_18_attention_1))[name = string("attention_18_concat_attention_all_heads")]; + tensor attention_18_channels_first_retransposed_perm_0 = const()[name = string("attention_18_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_18_reshaped_shape_0 = const()[name = string("attention_18_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_18_channels_first_retransposed = transpose(perm = attention_18_channels_first_retransposed_perm_0, x = attention_18_concat_attention_all_heads)[name = string("transpose_11")]; + tensor attention_18_reshaped = reshape(shape = attention_18_reshaped_shape_0, x = attention_18_channels_first_retransposed)[name = string("attention_18_reshaped")]; + tensor attention_18_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501107456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501709632))))[name = string("attention_18_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_86 = constexpr_blockwise_shift_scale(data = attention_18_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501738368))))[name = string("constexpr_blockwise_shift_scale_86")]; + tensor attention_18_outproj_strides_0 = const()[name = string("attention_18_outproj_strides_0"), val = tensor([1])]; + string attention_18_outproj_pad_type_0 = const()[name = string("attention_18_outproj_pad_type_0"), val = string("valid")]; + tensor attention_18_outproj_pad_0 = const()[name = string("attention_18_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_18_outproj_dilations_0 = const()[name = string("attention_18_outproj_dilations_0"), val = tensor([1])]; + int32 attention_18_outproj_groups_0 = const()[name = string("attention_18_outproj_groups_0"), val = int32(1)]; + tensor attention_18_outproj = conv(dilations = attention_18_outproj_dilations_0, groups = attention_18_outproj_groups_0, pad = attention_18_outproj_pad_0, pad_type = attention_18_outproj_pad_type_0, strides = attention_18_outproj_strides_0, weight = constexpr_blockwise_shift_scale_86, x = attention_18_reshaped)[name = string("attention_18_outproj")]; + tensor block_18_residual_1 = add(x = block_17_residual_2, y = attention_18_outproj)[name = string("block_18_residual_1")]; + tensor block_18_ffn_rmsnorm_abs = abs(x = block_18_residual_1)[name = string("block_18_ffn_rmsnorm_abs")]; + tensor block_18_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_18_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_18_ffn_rmsnorm_maxval = reduce_max(axes = block_18_ffn_rmsnorm_maxval_axes_0, keep_dims = block_18_ffn_rmsnorm_maxval_keep_dims_0, x = block_18_ffn_rmsnorm_abs)[name = string("block_18_ffn_rmsnorm_maxval")]; + fp16 block_18_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_18_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_18_ffn_rmsnorm_maxval_clipped = clip(alpha = block_18_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_18_ffn_rmsnorm_maxval_clipped_beta_0, x = block_18_ffn_rmsnorm_maxval)[name = string("block_18_ffn_rmsnorm_maxval_clipped")]; + tensor block_18_ffn_rmsnorm_scaled = real_div(x = block_18_residual_1, y = block_18_ffn_rmsnorm_maxval_clipped)[name = string("block_18_ffn_rmsnorm_scaled")]; + tensor block_18_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_18_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_18_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_18_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_18_ffn_rmsnorm_scaled)[name = string("block_18_ffn_rmsnorm_squared_sum")]; + fp16 block_18_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_18_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_18_ffn_rmsnorm_rsqrt_epsilon_0, x = block_18_ffn_rmsnorm_squared_sum)[name = string("block_18_ffn_rmsnorm_rsqrt")]; + fp16 block_18_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_18_ffn_rmsnorm_dim_scaled = mul(x = block_18_ffn_rmsnorm_scaled, y = block_18_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_18_ffn_rmsnorm_dim_scaled")]; + tensor block_18_ffn_rmsnorm_normalized = mul(x = block_18_ffn_rmsnorm_dim_scaled, y = block_18_ffn_rmsnorm_rsqrt)[name = string("block_18_ffn_rmsnorm_normalized")]; + tensor block_18_ffn_rmsnorm_y_0 = const()[name = string("block_18_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501740224)))]; + tensor block_18_ffn_rmsnorm = mul(x = block_18_ffn_rmsnorm_normalized, y = block_18_ffn_rmsnorm_y_0)[name = string("block_18_ffn_rmsnorm")]; + tensor block_18_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501742080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505010752))))[name = string("block_18_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_87 = constexpr_blockwise_shift_scale(data = block_18_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505166464))))[name = string("constexpr_blockwise_shift_scale_87")]; + tensor block_18_ffn_inproj_strides_0 = const()[name = string("block_18_ffn_inproj_strides_0"), val = tensor([1])]; + string block_18_ffn_inproj_pad_type_0 = const()[name = string("block_18_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_18_ffn_inproj_pad_0 = const()[name = string("block_18_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_18_ffn_inproj_dilations_0 = const()[name = string("block_18_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_18_ffn_inproj_groups_0 = const()[name = string("block_18_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_18_ffn_inproj = conv(dilations = block_18_ffn_inproj_dilations_0, groups = block_18_ffn_inproj_groups_0, pad = block_18_ffn_inproj_pad_0, pad_type = block_18_ffn_inproj_pad_type_0, strides = block_18_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_87, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_inproj")]; + tensor block_18_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505176256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508444928))))[name = string("block_18_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_88 = constexpr_blockwise_shift_scale(data = block_18_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508600640))))[name = string("constexpr_blockwise_shift_scale_88")]; + tensor block_18_ffn_g_strides_0 = const()[name = string("block_18_ffn_g_strides_0"), val = tensor([1])]; + string block_18_ffn_g_pad_type_0 = const()[name = string("block_18_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_18_ffn_g_pad_0 = const()[name = string("block_18_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_18_ffn_g_dilations_0 = const()[name = string("block_18_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_18_ffn_g_groups_0 = const()[name = string("block_18_ffn_g_groups_0"), val = int32(1)]; + tensor block_18_ffn_g = conv(dilations = block_18_ffn_g_dilations_0, groups = block_18_ffn_g_groups_0, pad = block_18_ffn_g_pad_0, pad_type = block_18_ffn_g_pad_type_0, strides = block_18_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_88, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_g")]; + tensor block_18_ffn_g_activation = silu(x = block_18_ffn_g)[name = string("block_18_ffn_g_activation")]; + tensor block_18_ffn_x_gated = mul(x = block_18_ffn_inproj, y = block_18_ffn_g_activation)[name = string("block_18_ffn_x_gated")]; + tensor block_18_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508610432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511879104))))[name = string("block_18_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_89 = constexpr_blockwise_shift_scale(data = block_18_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511907840))))[name = string("constexpr_blockwise_shift_scale_89")]; + tensor block_18_ffn_outproj_strides_0 = const()[name = string("block_18_ffn_outproj_strides_0"), val = tensor([1])]; + string block_18_ffn_outproj_pad_type_0 = const()[name = string("block_18_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_18_ffn_outproj_pad_0 = const()[name = string("block_18_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_18_ffn_outproj_dilations_0 = const()[name = string("block_18_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_18_ffn_outproj_groups_0 = const()[name = string("block_18_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_18_ffn_outproj = conv(dilations = block_18_ffn_outproj_dilations_0, groups = block_18_ffn_outproj_groups_0, pad = block_18_ffn_outproj_pad_0, pad_type = block_18_ffn_outproj_pad_type_0, strides = block_18_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_89, x = block_18_ffn_x_gated)[name = string("block_18_ffn_outproj")]; + tensor block_18_residual_2 = add(x = block_18_ffn_outproj, y = block_18_residual_1)[name = string("block_18_residual_2")]; + tensor block_19_attention_rmsnorm_abs = abs(x = block_18_residual_2)[name = string("block_19_attention_rmsnorm_abs")]; + tensor block_19_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_19_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_19_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_19_attention_rmsnorm_maxval = reduce_max(axes = block_19_attention_rmsnorm_maxval_axes_0, keep_dims = block_19_attention_rmsnorm_maxval_keep_dims_0, x = block_19_attention_rmsnorm_abs)[name = string("block_19_attention_rmsnorm_maxval")]; + fp16 block_19_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_19_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_19_attention_rmsnorm_maxval_clipped = clip(alpha = block_19_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_19_attention_rmsnorm_maxval_clipped_beta_0, x = block_19_attention_rmsnorm_maxval)[name = string("block_19_attention_rmsnorm_maxval_clipped")]; + tensor block_19_attention_rmsnorm_scaled = real_div(x = block_18_residual_2, y = block_19_attention_rmsnorm_maxval_clipped)[name = string("block_19_attention_rmsnorm_scaled")]; + tensor block_19_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_19_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_19_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_19_attention_rmsnorm_squared_sum_keep_dims_0, x = block_19_attention_rmsnorm_scaled)[name = string("block_19_attention_rmsnorm_squared_sum")]; + fp16 block_19_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_19_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_19_attention_rmsnorm_rsqrt_epsilon_0, x = block_19_attention_rmsnorm_squared_sum)[name = string("block_19_attention_rmsnorm_rsqrt")]; + fp16 block_19_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_19_attention_rmsnorm_dim_scaled = mul(x = block_19_attention_rmsnorm_scaled, y = block_19_attention_rmsnorm_dim_scaled_y_0)[name = string("block_19_attention_rmsnorm_dim_scaled")]; + tensor block_19_attention_rmsnorm_normalized = mul(x = block_19_attention_rmsnorm_dim_scaled, y = block_19_attention_rmsnorm_rsqrt)[name = string("block_19_attention_rmsnorm_normalized")]; + tensor block_19_attention_rmsnorm_y_0 = const()[name = string("block_19_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511909696)))]; + tensor block_19_attention_rmsnorm = mul(x = block_19_attention_rmsnorm_normalized, y = block_19_attention_rmsnorm_y_0)[name = string("block_19_attention_rmsnorm")]; + tensor attention_19_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511911552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512685760))))[name = string("attention_19_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_90 = constexpr_blockwise_shift_scale(data = attention_19_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512722688))))[name = string("constexpr_blockwise_shift_scale_90")]; + tensor attention_19_qkvproj_bias_0 = const()[name = string("attention_19_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512725056)))]; + tensor attention_19_qkvproj_strides_0 = const()[name = string("attention_19_qkvproj_strides_0"), val = tensor([1])]; + string attention_19_qkvproj_pad_type_0 = const()[name = string("attention_19_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_19_qkvproj_pad_0 = const()[name = string("attention_19_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_19_qkvproj_dilations_0 = const()[name = string("attention_19_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_19_qkvproj_groups_0 = const()[name = string("attention_19_qkvproj_groups_0"), val = int32(1)]; + tensor attention_19_qkvproj = conv(bias = attention_19_qkvproj_bias_0, dilations = attention_19_qkvproj_dilations_0, groups = attention_19_qkvproj_groups_0, pad = attention_19_qkvproj_pad_0, pad_type = attention_19_qkvproj_pad_type_0, strides = attention_19_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_90, x = block_19_attention_rmsnorm)[name = string("attention_19_qkvproj")]; + tensor attention_19_head_reshape_shape_0 = const()[name = string("attention_19_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_19_head_reshape = reshape(shape = attention_19_head_reshape_shape_0, x = attention_19_qkvproj)[name = string("attention_19_head_reshape")]; + tensor attention_19_head_transpose_perm_0 = const()[name = string("attention_19_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_19_split_qkv_heads_axis_0 = const()[name = string("attention_19_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_19_split_qkv_heads_split_sizes_0 = const()[name = string("attention_19_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_19_head_transpose = transpose(perm = attention_19_head_transpose_perm_0, x = attention_19_head_reshape)[name = string("transpose_10")]; + tensor attention_19_split_qkv_heads_0, tensor attention_19_split_qkv_heads_1, tensor attention_19_split_qkv_heads_2 = split(axis = attention_19_split_qkv_heads_axis_0, split_sizes = attention_19_split_qkv_heads_split_sizes_0, x = attention_19_head_transpose)[name = string("attention_19_split_qkv_heads")]; + tensor attention_19_q_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_19_q_rope_lhs_mult")]; + int32 attention_19_q_rotate_half_split_num_splits_0 = const()[name = string("attention_19_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_19_q_rotate_half_split_axis_0 = const()[name = string("attention_19_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_19_q_rotate_half_split_0, tensor attention_19_q_rotate_half_split_1 = split(axis = attention_19_q_rotate_half_split_axis_0, num_splits = attention_19_q_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_0)[name = string("attention_19_q_rotate_half_split")]; + fp16 attention_19_q_rotate_half_neg_y_0 = const()[name = string("attention_19_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_19_q_rotate_half_neg = mul(x = attention_19_q_rotate_half_split_1, y = attention_19_q_rotate_half_neg_y_0)[name = string("attention_19_q_rotate_half_neg")]; + int32 attention_19_q_rotate_half_concat_axis_0 = const()[name = string("attention_19_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_19_q_rotate_half_concat_interleave_0 = const()[name = string("attention_19_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_19_q_rotate_half_concat = concat(axis = attention_19_q_rotate_half_concat_axis_0, interleave = attention_19_q_rotate_half_concat_interleave_0, values = (attention_19_q_rotate_half_neg, attention_19_q_rotate_half_split_0))[name = string("attention_19_q_rotate_half_concat")]; + tensor attention_19_q_rope_rhs_mult = mul(x = attention_19_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_q_rope_rhs_mult")]; + tensor attention_19_q_rope = add(x = attention_19_q_rope_lhs_mult, y = attention_19_q_rope_rhs_mult)[name = string("attention_19_q_rope")]; + tensor attention_19_k_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_19_k_rope_lhs_mult")]; + int32 attention_19_k_rotate_half_split_num_splits_0 = const()[name = string("attention_19_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_19_k_rotate_half_split_axis_0 = const()[name = string("attention_19_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_19_k_rotate_half_split_0, tensor attention_19_k_rotate_half_split_1 = split(axis = attention_19_k_rotate_half_split_axis_0, num_splits = attention_19_k_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_1)[name = string("attention_19_k_rotate_half_split")]; + fp16 attention_19_k_rotate_half_neg_y_0 = const()[name = string("attention_19_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_19_k_rotate_half_neg = mul(x = attention_19_k_rotate_half_split_1, y = attention_19_k_rotate_half_neg_y_0)[name = string("attention_19_k_rotate_half_neg")]; + int32 attention_19_k_rotate_half_concat_axis_0 = const()[name = string("attention_19_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_19_k_rotate_half_concat_interleave_0 = const()[name = string("attention_19_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_19_k_rotate_half_concat = concat(axis = attention_19_k_rotate_half_concat_axis_0, interleave = attention_19_k_rotate_half_concat_interleave_0, values = (attention_19_k_rotate_half_neg, attention_19_k_rotate_half_split_0))[name = string("attention_19_k_rotate_half_concat")]; + tensor attention_19_k_rope_rhs_mult = mul(x = attention_19_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_k_rope_rhs_mult")]; + tensor attention_19_k_rope = add(x = attention_19_k_rope_lhs_mult, y = attention_19_k_rope_rhs_mult)[name = string("attention_19_k_rope")]; + int32 attention_19_q_splits_axis_0 = const()[name = string("attention_19_q_splits_axis_0"), val = int32(1)]; + int32 attention_19_q_splits_num_splits_0 = const()[name = string("attention_19_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_19_q_splits_0, tensor attention_19_q_splits_1 = split(axis = attention_19_q_splits_axis_0, num_splits = attention_19_q_splits_num_splits_0, x = attention_19_q_rope)[name = string("attention_19_q_splits")]; + tensor attention_19_update_begin_0_values0_0 = const()[name = string("attention_19_update_begin_0_values0_0"), val = tensor([19])]; + tensor attention_19_update_begin_0_values1_0 = const()[name = string("attention_19_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_19_update_begin_0_values3_0 = const()[name = string("attention_19_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_19_update_begin_0_axis_0 = const()[name = string("attention_19_update_begin_0_axis_0"), val = int32(0)]; + bool attention_19_update_begin_0_interleave_0 = const()[name = string("attention_19_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_19_update_begin_0 = concat(axis = attention_19_update_begin_0_axis_0, interleave = attention_19_update_begin_0_interleave_0, values = (attention_19_update_begin_0_values0_0, attention_19_update_begin_0_values1_0, query_pos1, attention_19_update_begin_0_values3_0))[name = string("attention_19_update_begin_0")]; + tensor attention_19_update_end_0_values0_0 = const()[name = string("attention_19_update_end_0_values0_0"), val = tensor([20])]; + tensor attention_19_update_end_0_values1_0 = const()[name = string("attention_19_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_19_update_end_0_values3_0 = const()[name = string("attention_19_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_19_update_end_0_axis_0 = const()[name = string("attention_19_update_end_0_axis_0"), val = int32(0)]; + bool attention_19_update_end_0_interleave_0 = const()[name = string("attention_19_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_19_update_end_0 = concat(axis = attention_19_update_end_0_axis_0, interleave = attention_19_update_end_0_interleave_0, values = (attention_19_update_end_0_values0_0, attention_19_update_end_0_values1_0, end_pos_0, attention_19_update_end_0_values3_0))[name = string("attention_19_update_end_0")]; + tensor attention_19_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_updated_key_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_key_cache_0_squeeze_mask_0, update = attention_19_k_rope, x = coreml_update_state_36)[name = string("attention_19_updated_key_cache_0")]; + write_state(data = attention_19_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_38 = read_state(input = key_cache_state)[name = string("coreml_update_state_86")]; + tensor attention_19_key_cache_begin_0 = const()[name = string("attention_19_key_cache_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor attention_19_key_cache_end_0 = const()[name = string("attention_19_key_cache_end_0"), val = tensor([20, 2, 512, 64])]; + tensor attention_19_key_cache_squeeze_mask_0 = const()[name = string("attention_19_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_key_cache = slice_by_index(begin = attention_19_key_cache_begin_0, end = attention_19_key_cache_end_0, squeeze_mask = attention_19_key_cache_squeeze_mask_0, x = coreml_update_state_38)[name = string("attention_19_key_cache")]; + int32 attention_19_key_cache_head_axis_0 = const()[name = string("attention_19_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_19_key_cache_head_num_splits_0 = const()[name = string("attention_19_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_19_key_cache_head_0, tensor attention_19_key_cache_head_1 = split(axis = attention_19_key_cache_head_axis_0, num_splits = attention_19_key_cache_head_num_splits_0, x = attention_19_key_cache)[name = string("attention_19_key_cache_head")]; + tensor attention_19_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_updated_value_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_value_cache_0_squeeze_mask_0, update = attention_19_split_qkv_heads_2, x = coreml_update_state_37)[name = string("attention_19_updated_value_cache_0")]; + write_state(data = attention_19_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_39 = read_state(input = value_cache_state)[name = string("coreml_update_state_87")]; + tensor attention_19_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_19_slice_current_layer_value_cache_begin_0"), val = tensor([19, 0, 0, 0])]; + tensor attention_19_slice_current_layer_value_cache_end_0 = const()[name = string("attention_19_slice_current_layer_value_cache_end_0"), val = tensor([20, 2, 512, 64])]; + tensor attention_19_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_19_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_19_slice_current_layer_value_cache = slice_by_index(begin = attention_19_slice_current_layer_value_cache_begin_0, end = attention_19_slice_current_layer_value_cache_end_0, squeeze_mask = attention_19_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_39)[name = string("attention_19_slice_current_layer_value_cache")]; + int32 attention_19_slice_value_cache_heads_axis_0 = const()[name = string("attention_19_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_19_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_19_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_19_slice_value_cache_heads_0, tensor attention_19_slice_value_cache_heads_1 = split(axis = attention_19_slice_value_cache_heads_axis_0, num_splits = attention_19_slice_value_cache_heads_num_splits_0, x = attention_19_slice_current_layer_value_cache)[name = string("attention_19_slice_value_cache_heads")]; + bool attention_19_scores_0_transpose_y_0 = const()[name = string("attention_19_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_19_scores_0_transpose_x_0 = const()[name = string("attention_19_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_19_scores_0 = matmul(transpose_x = attention_19_scores_0_transpose_x_0, transpose_y = attention_19_scores_0_transpose_y_0, x = attention_19_key_cache_head_0, y = attention_19_q_splits_0)[name = string("attention_19_scores_0")]; + fp16 attention_19_scaled_scores_0_y_0 = const()[name = string("attention_19_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_19_scaled_scores_0 = mul(x = attention_19_scores_0, y = attention_19_scaled_scores_0_y_0)[name = string("attention_19_scaled_scores_0")]; + tensor attention_19_masked_scaled_scores_0 = add(x = attention_19_scaled_scores_0, y = transpose_0)[name = string("attention_19_masked_scaled_scores_0")]; + int32 softmax_38_axis_0 = const()[name = string("softmax_38_axis_0"), val = int32(-2)]; + tensor softmax_38 = softmax(axis = softmax_38_axis_0, x = attention_19_masked_scaled_scores_0)[name = string("softmax_38")]; + bool attention_19_attention_0_transpose_x_0 = const()[name = string("attention_19_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_19_attention_0_transpose_y_0 = const()[name = string("attention_19_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_19_attention_0 = matmul(transpose_x = attention_19_attention_0_transpose_x_0, transpose_y = attention_19_attention_0_transpose_y_0, x = softmax_38, y = attention_19_slice_value_cache_heads_0)[name = string("attention_19_attention_0")]; + bool attention_19_scores_1_transpose_y_0 = const()[name = string("attention_19_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_19_scores_1_transpose_x_0 = const()[name = string("attention_19_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_19_scores_1 = matmul(transpose_x = attention_19_scores_1_transpose_x_0, transpose_y = attention_19_scores_1_transpose_y_0, x = attention_19_key_cache_head_1, y = attention_19_q_splits_1)[name = string("attention_19_scores_1")]; + fp16 attention_19_scaled_scores_1_y_0 = const()[name = string("attention_19_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_19_scaled_scores_1 = mul(x = attention_19_scores_1, y = attention_19_scaled_scores_1_y_0)[name = string("attention_19_scaled_scores_1")]; + tensor attention_19_masked_scaled_scores_1 = add(x = attention_19_scaled_scores_1, y = transpose_0)[name = string("attention_19_masked_scaled_scores_1")]; + int32 softmax_39_axis_0 = const()[name = string("softmax_39_axis_0"), val = int32(-2)]; + tensor softmax_39 = softmax(axis = softmax_39_axis_0, x = attention_19_masked_scaled_scores_1)[name = string("softmax_39")]; + bool attention_19_attention_1_transpose_x_0 = const()[name = string("attention_19_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_19_attention_1_transpose_y_0 = const()[name = string("attention_19_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_19_attention_1 = matmul(transpose_x = attention_19_attention_1_transpose_x_0, transpose_y = attention_19_attention_1_transpose_y_0, x = softmax_39, y = attention_19_slice_value_cache_heads_1)[name = string("attention_19_attention_1")]; + int32 attention_19_concat_attention_all_heads_axis_0 = const()[name = string("attention_19_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_19_concat_attention_all_heads_interleave_0 = const()[name = string("attention_19_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_19_concat_attention_all_heads = concat(axis = attention_19_concat_attention_all_heads_axis_0, interleave = attention_19_concat_attention_all_heads_interleave_0, values = (attention_19_attention_0, attention_19_attention_1))[name = string("attention_19_concat_attention_all_heads")]; + tensor attention_19_channels_first_retransposed_perm_0 = const()[name = string("attention_19_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_19_reshaped_shape_0 = const()[name = string("attention_19_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_19_channels_first_retransposed = transpose(perm = attention_19_channels_first_retransposed_perm_0, x = attention_19_concat_attention_all_heads)[name = string("transpose_9")]; + tensor attention_19_reshaped = reshape(shape = attention_19_reshaped_shape_0, x = attention_19_channels_first_retransposed)[name = string("attention_19_reshaped")]; + tensor attention_19_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512727424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513329600))))[name = string("attention_19_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_91 = constexpr_blockwise_shift_scale(data = attention_19_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513358336))))[name = string("constexpr_blockwise_shift_scale_91")]; + tensor attention_19_outproj_strides_0 = const()[name = string("attention_19_outproj_strides_0"), val = tensor([1])]; + string attention_19_outproj_pad_type_0 = const()[name = string("attention_19_outproj_pad_type_0"), val = string("valid")]; + tensor attention_19_outproj_pad_0 = const()[name = string("attention_19_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_19_outproj_dilations_0 = const()[name = string("attention_19_outproj_dilations_0"), val = tensor([1])]; + int32 attention_19_outproj_groups_0 = const()[name = string("attention_19_outproj_groups_0"), val = int32(1)]; + tensor attention_19_outproj = conv(dilations = attention_19_outproj_dilations_0, groups = attention_19_outproj_groups_0, pad = attention_19_outproj_pad_0, pad_type = attention_19_outproj_pad_type_0, strides = attention_19_outproj_strides_0, weight = constexpr_blockwise_shift_scale_91, x = attention_19_reshaped)[name = string("attention_19_outproj")]; + tensor block_19_residual_1 = add(x = block_18_residual_2, y = attention_19_outproj)[name = string("block_19_residual_1")]; + tensor block_19_ffn_rmsnorm_abs = abs(x = block_19_residual_1)[name = string("block_19_ffn_rmsnorm_abs")]; + tensor block_19_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_19_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_19_ffn_rmsnorm_maxval = reduce_max(axes = block_19_ffn_rmsnorm_maxval_axes_0, keep_dims = block_19_ffn_rmsnorm_maxval_keep_dims_0, x = block_19_ffn_rmsnorm_abs)[name = string("block_19_ffn_rmsnorm_maxval")]; + fp16 block_19_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_19_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_19_ffn_rmsnorm_maxval_clipped = clip(alpha = block_19_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_19_ffn_rmsnorm_maxval_clipped_beta_0, x = block_19_ffn_rmsnorm_maxval)[name = string("block_19_ffn_rmsnorm_maxval_clipped")]; + tensor block_19_ffn_rmsnorm_scaled = real_div(x = block_19_residual_1, y = block_19_ffn_rmsnorm_maxval_clipped)[name = string("block_19_ffn_rmsnorm_scaled")]; + tensor block_19_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_19_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_19_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_19_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_19_ffn_rmsnorm_scaled)[name = string("block_19_ffn_rmsnorm_squared_sum")]; + fp16 block_19_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_19_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_19_ffn_rmsnorm_rsqrt_epsilon_0, x = block_19_ffn_rmsnorm_squared_sum)[name = string("block_19_ffn_rmsnorm_rsqrt")]; + fp16 block_19_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_19_ffn_rmsnorm_dim_scaled = mul(x = block_19_ffn_rmsnorm_scaled, y = block_19_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_19_ffn_rmsnorm_dim_scaled")]; + tensor block_19_ffn_rmsnorm_normalized = mul(x = block_19_ffn_rmsnorm_dim_scaled, y = block_19_ffn_rmsnorm_rsqrt)[name = string("block_19_ffn_rmsnorm_normalized")]; + tensor block_19_ffn_rmsnorm_y_0 = const()[name = string("block_19_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513360192)))]; + tensor block_19_ffn_rmsnorm = mul(x = block_19_ffn_rmsnorm_normalized, y = block_19_ffn_rmsnorm_y_0)[name = string("block_19_ffn_rmsnorm")]; + tensor block_19_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516630720))))[name = string("block_19_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_92 = constexpr_blockwise_shift_scale(data = block_19_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516786432))))[name = string("constexpr_blockwise_shift_scale_92")]; + tensor block_19_ffn_inproj_strides_0 = const()[name = string("block_19_ffn_inproj_strides_0"), val = tensor([1])]; + string block_19_ffn_inproj_pad_type_0 = const()[name = string("block_19_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_19_ffn_inproj_pad_0 = const()[name = string("block_19_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_19_ffn_inproj_dilations_0 = const()[name = string("block_19_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_19_ffn_inproj_groups_0 = const()[name = string("block_19_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_19_ffn_inproj = conv(dilations = block_19_ffn_inproj_dilations_0, groups = block_19_ffn_inproj_groups_0, pad = block_19_ffn_inproj_pad_0, pad_type = block_19_ffn_inproj_pad_type_0, strides = block_19_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_92, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_inproj")]; + tensor block_19_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516796224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520064896))))[name = string("block_19_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_93 = constexpr_blockwise_shift_scale(data = block_19_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520220608))))[name = string("constexpr_blockwise_shift_scale_93")]; + tensor block_19_ffn_g_strides_0 = const()[name = string("block_19_ffn_g_strides_0"), val = tensor([1])]; + string block_19_ffn_g_pad_type_0 = const()[name = string("block_19_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_19_ffn_g_pad_0 = const()[name = string("block_19_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_19_ffn_g_dilations_0 = const()[name = string("block_19_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_19_ffn_g_groups_0 = const()[name = string("block_19_ffn_g_groups_0"), val = int32(1)]; + tensor block_19_ffn_g = conv(dilations = block_19_ffn_g_dilations_0, groups = block_19_ffn_g_groups_0, pad = block_19_ffn_g_pad_0, pad_type = block_19_ffn_g_pad_type_0, strides = block_19_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_93, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_g")]; + tensor block_19_ffn_g_activation = silu(x = block_19_ffn_g)[name = string("block_19_ffn_g_activation")]; + tensor block_19_ffn_x_gated = mul(x = block_19_ffn_inproj, y = block_19_ffn_g_activation)[name = string("block_19_ffn_x_gated")]; + tensor block_19_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520230400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523499072))))[name = string("block_19_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_94 = constexpr_blockwise_shift_scale(data = block_19_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523527808))))[name = string("constexpr_blockwise_shift_scale_94")]; + tensor block_19_ffn_outproj_strides_0 = const()[name = string("block_19_ffn_outproj_strides_0"), val = tensor([1])]; + string block_19_ffn_outproj_pad_type_0 = const()[name = string("block_19_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_19_ffn_outproj_pad_0 = const()[name = string("block_19_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_19_ffn_outproj_dilations_0 = const()[name = string("block_19_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_19_ffn_outproj_groups_0 = const()[name = string("block_19_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_19_ffn_outproj = conv(dilations = block_19_ffn_outproj_dilations_0, groups = block_19_ffn_outproj_groups_0, pad = block_19_ffn_outproj_pad_0, pad_type = block_19_ffn_outproj_pad_type_0, strides = block_19_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_94, x = block_19_ffn_x_gated)[name = string("block_19_ffn_outproj")]; + tensor block_19_residual_2 = add(x = block_19_ffn_outproj, y = block_19_residual_1)[name = string("block_19_residual_2")]; + tensor block_20_attention_rmsnorm_abs = abs(x = block_19_residual_2)[name = string("block_20_attention_rmsnorm_abs")]; + tensor block_20_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_20_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_20_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_20_attention_rmsnorm_maxval = reduce_max(axes = block_20_attention_rmsnorm_maxval_axes_0, keep_dims = block_20_attention_rmsnorm_maxval_keep_dims_0, x = block_20_attention_rmsnorm_abs)[name = string("block_20_attention_rmsnorm_maxval")]; + fp16 block_20_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_20_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_20_attention_rmsnorm_maxval_clipped = clip(alpha = block_20_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_20_attention_rmsnorm_maxval_clipped_beta_0, x = block_20_attention_rmsnorm_maxval)[name = string("block_20_attention_rmsnorm_maxval_clipped")]; + tensor block_20_attention_rmsnorm_scaled = real_div(x = block_19_residual_2, y = block_20_attention_rmsnorm_maxval_clipped)[name = string("block_20_attention_rmsnorm_scaled")]; + tensor block_20_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_20_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_20_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_20_attention_rmsnorm_squared_sum_keep_dims_0, x = block_20_attention_rmsnorm_scaled)[name = string("block_20_attention_rmsnorm_squared_sum")]; + fp16 block_20_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_20_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_20_attention_rmsnorm_rsqrt_epsilon_0, x = block_20_attention_rmsnorm_squared_sum)[name = string("block_20_attention_rmsnorm_rsqrt")]; + fp16 block_20_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_20_attention_rmsnorm_dim_scaled = mul(x = block_20_attention_rmsnorm_scaled, y = block_20_attention_rmsnorm_dim_scaled_y_0)[name = string("block_20_attention_rmsnorm_dim_scaled")]; + tensor block_20_attention_rmsnorm_normalized = mul(x = block_20_attention_rmsnorm_dim_scaled, y = block_20_attention_rmsnorm_rsqrt)[name = string("block_20_attention_rmsnorm_normalized")]; + tensor block_20_attention_rmsnorm_y_0 = const()[name = string("block_20_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523529664)))]; + tensor block_20_attention_rmsnorm = mul(x = block_20_attention_rmsnorm_normalized, y = block_20_attention_rmsnorm_y_0)[name = string("block_20_attention_rmsnorm")]; + tensor attention_20_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523531520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524305728))))[name = string("attention_20_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_95 = constexpr_blockwise_shift_scale(data = attention_20_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524342656))))[name = string("constexpr_blockwise_shift_scale_95")]; + tensor attention_20_qkvproj_bias_0 = const()[name = string("attention_20_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524345024)))]; + tensor attention_20_qkvproj_strides_0 = const()[name = string("attention_20_qkvproj_strides_0"), val = tensor([1])]; + string attention_20_qkvproj_pad_type_0 = const()[name = string("attention_20_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_20_qkvproj_pad_0 = const()[name = string("attention_20_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_20_qkvproj_dilations_0 = const()[name = string("attention_20_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_20_qkvproj_groups_0 = const()[name = string("attention_20_qkvproj_groups_0"), val = int32(1)]; + tensor attention_20_qkvproj = conv(bias = attention_20_qkvproj_bias_0, dilations = attention_20_qkvproj_dilations_0, groups = attention_20_qkvproj_groups_0, pad = attention_20_qkvproj_pad_0, pad_type = attention_20_qkvproj_pad_type_0, strides = attention_20_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_95, x = block_20_attention_rmsnorm)[name = string("attention_20_qkvproj")]; + tensor attention_20_head_reshape_shape_0 = const()[name = string("attention_20_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_20_head_reshape = reshape(shape = attention_20_head_reshape_shape_0, x = attention_20_qkvproj)[name = string("attention_20_head_reshape")]; + tensor attention_20_head_transpose_perm_0 = const()[name = string("attention_20_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_20_split_qkv_heads_axis_0 = const()[name = string("attention_20_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_20_split_qkv_heads_split_sizes_0 = const()[name = string("attention_20_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_20_head_transpose = transpose(perm = attention_20_head_transpose_perm_0, x = attention_20_head_reshape)[name = string("transpose_8")]; + tensor attention_20_split_qkv_heads_0, tensor attention_20_split_qkv_heads_1, tensor attention_20_split_qkv_heads_2 = split(axis = attention_20_split_qkv_heads_axis_0, split_sizes = attention_20_split_qkv_heads_split_sizes_0, x = attention_20_head_transpose)[name = string("attention_20_split_qkv_heads")]; + tensor attention_20_q_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_20_q_rope_lhs_mult")]; + int32 attention_20_q_rotate_half_split_num_splits_0 = const()[name = string("attention_20_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_20_q_rotate_half_split_axis_0 = const()[name = string("attention_20_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_20_q_rotate_half_split_0, tensor attention_20_q_rotate_half_split_1 = split(axis = attention_20_q_rotate_half_split_axis_0, num_splits = attention_20_q_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_0)[name = string("attention_20_q_rotate_half_split")]; + fp16 attention_20_q_rotate_half_neg_y_0 = const()[name = string("attention_20_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_20_q_rotate_half_neg = mul(x = attention_20_q_rotate_half_split_1, y = attention_20_q_rotate_half_neg_y_0)[name = string("attention_20_q_rotate_half_neg")]; + int32 attention_20_q_rotate_half_concat_axis_0 = const()[name = string("attention_20_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_20_q_rotate_half_concat_interleave_0 = const()[name = string("attention_20_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_20_q_rotate_half_concat = concat(axis = attention_20_q_rotate_half_concat_axis_0, interleave = attention_20_q_rotate_half_concat_interleave_0, values = (attention_20_q_rotate_half_neg, attention_20_q_rotate_half_split_0))[name = string("attention_20_q_rotate_half_concat")]; + tensor attention_20_q_rope_rhs_mult = mul(x = attention_20_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_q_rope_rhs_mult")]; + tensor attention_20_q_rope = add(x = attention_20_q_rope_lhs_mult, y = attention_20_q_rope_rhs_mult)[name = string("attention_20_q_rope")]; + tensor attention_20_k_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_20_k_rope_lhs_mult")]; + int32 attention_20_k_rotate_half_split_num_splits_0 = const()[name = string("attention_20_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_20_k_rotate_half_split_axis_0 = const()[name = string("attention_20_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_20_k_rotate_half_split_0, tensor attention_20_k_rotate_half_split_1 = split(axis = attention_20_k_rotate_half_split_axis_0, num_splits = attention_20_k_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_1)[name = string("attention_20_k_rotate_half_split")]; + fp16 attention_20_k_rotate_half_neg_y_0 = const()[name = string("attention_20_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_20_k_rotate_half_neg = mul(x = attention_20_k_rotate_half_split_1, y = attention_20_k_rotate_half_neg_y_0)[name = string("attention_20_k_rotate_half_neg")]; + int32 attention_20_k_rotate_half_concat_axis_0 = const()[name = string("attention_20_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_20_k_rotate_half_concat_interleave_0 = const()[name = string("attention_20_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_20_k_rotate_half_concat = concat(axis = attention_20_k_rotate_half_concat_axis_0, interleave = attention_20_k_rotate_half_concat_interleave_0, values = (attention_20_k_rotate_half_neg, attention_20_k_rotate_half_split_0))[name = string("attention_20_k_rotate_half_concat")]; + tensor attention_20_k_rope_rhs_mult = mul(x = attention_20_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_k_rope_rhs_mult")]; + tensor attention_20_k_rope = add(x = attention_20_k_rope_lhs_mult, y = attention_20_k_rope_rhs_mult)[name = string("attention_20_k_rope")]; + int32 attention_20_q_splits_axis_0 = const()[name = string("attention_20_q_splits_axis_0"), val = int32(1)]; + int32 attention_20_q_splits_num_splits_0 = const()[name = string("attention_20_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_20_q_splits_0, tensor attention_20_q_splits_1 = split(axis = attention_20_q_splits_axis_0, num_splits = attention_20_q_splits_num_splits_0, x = attention_20_q_rope)[name = string("attention_20_q_splits")]; + tensor attention_20_update_begin_0_values0_0 = const()[name = string("attention_20_update_begin_0_values0_0"), val = tensor([20])]; + tensor attention_20_update_begin_0_values1_0 = const()[name = string("attention_20_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_20_update_begin_0_values3_0 = const()[name = string("attention_20_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_20_update_begin_0_axis_0 = const()[name = string("attention_20_update_begin_0_axis_0"), val = int32(0)]; + bool attention_20_update_begin_0_interleave_0 = const()[name = string("attention_20_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_20_update_begin_0 = concat(axis = attention_20_update_begin_0_axis_0, interleave = attention_20_update_begin_0_interleave_0, values = (attention_20_update_begin_0_values0_0, attention_20_update_begin_0_values1_0, query_pos1, attention_20_update_begin_0_values3_0))[name = string("attention_20_update_begin_0")]; + tensor attention_20_update_end_0_values0_0 = const()[name = string("attention_20_update_end_0_values0_0"), val = tensor([21])]; + tensor attention_20_update_end_0_values1_0 = const()[name = string("attention_20_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_20_update_end_0_values3_0 = const()[name = string("attention_20_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_20_update_end_0_axis_0 = const()[name = string("attention_20_update_end_0_axis_0"), val = int32(0)]; + bool attention_20_update_end_0_interleave_0 = const()[name = string("attention_20_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_20_update_end_0 = concat(axis = attention_20_update_end_0_axis_0, interleave = attention_20_update_end_0_interleave_0, values = (attention_20_update_end_0_values0_0, attention_20_update_end_0_values1_0, end_pos_0, attention_20_update_end_0_values3_0))[name = string("attention_20_update_end_0")]; + tensor attention_20_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_updated_key_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_key_cache_0_squeeze_mask_0, update = attention_20_k_rope, x = coreml_update_state_38)[name = string("attention_20_updated_key_cache_0")]; + write_state(data = attention_20_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_40 = read_state(input = key_cache_state)[name = string("coreml_update_state_88")]; + tensor attention_20_key_cache_begin_0 = const()[name = string("attention_20_key_cache_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor attention_20_key_cache_end_0 = const()[name = string("attention_20_key_cache_end_0"), val = tensor([21, 2, 512, 64])]; + tensor attention_20_key_cache_squeeze_mask_0 = const()[name = string("attention_20_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_key_cache = slice_by_index(begin = attention_20_key_cache_begin_0, end = attention_20_key_cache_end_0, squeeze_mask = attention_20_key_cache_squeeze_mask_0, x = coreml_update_state_40)[name = string("attention_20_key_cache")]; + int32 attention_20_key_cache_head_axis_0 = const()[name = string("attention_20_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_20_key_cache_head_num_splits_0 = const()[name = string("attention_20_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_20_key_cache_head_0, tensor attention_20_key_cache_head_1 = split(axis = attention_20_key_cache_head_axis_0, num_splits = attention_20_key_cache_head_num_splits_0, x = attention_20_key_cache)[name = string("attention_20_key_cache_head")]; + tensor attention_20_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_updated_value_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_value_cache_0_squeeze_mask_0, update = attention_20_split_qkv_heads_2, x = coreml_update_state_39)[name = string("attention_20_updated_value_cache_0")]; + write_state(data = attention_20_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_41 = read_state(input = value_cache_state)[name = string("coreml_update_state_89")]; + tensor attention_20_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_20_slice_current_layer_value_cache_begin_0"), val = tensor([20, 0, 0, 0])]; + tensor attention_20_slice_current_layer_value_cache_end_0 = const()[name = string("attention_20_slice_current_layer_value_cache_end_0"), val = tensor([21, 2, 512, 64])]; + tensor attention_20_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_20_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_20_slice_current_layer_value_cache = slice_by_index(begin = attention_20_slice_current_layer_value_cache_begin_0, end = attention_20_slice_current_layer_value_cache_end_0, squeeze_mask = attention_20_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_41)[name = string("attention_20_slice_current_layer_value_cache")]; + int32 attention_20_slice_value_cache_heads_axis_0 = const()[name = string("attention_20_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_20_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_20_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_20_slice_value_cache_heads_0, tensor attention_20_slice_value_cache_heads_1 = split(axis = attention_20_slice_value_cache_heads_axis_0, num_splits = attention_20_slice_value_cache_heads_num_splits_0, x = attention_20_slice_current_layer_value_cache)[name = string("attention_20_slice_value_cache_heads")]; + bool attention_20_scores_0_transpose_y_0 = const()[name = string("attention_20_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_20_scores_0_transpose_x_0 = const()[name = string("attention_20_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_20_scores_0 = matmul(transpose_x = attention_20_scores_0_transpose_x_0, transpose_y = attention_20_scores_0_transpose_y_0, x = attention_20_key_cache_head_0, y = attention_20_q_splits_0)[name = string("attention_20_scores_0")]; + fp16 attention_20_scaled_scores_0_y_0 = const()[name = string("attention_20_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_20_scaled_scores_0 = mul(x = attention_20_scores_0, y = attention_20_scaled_scores_0_y_0)[name = string("attention_20_scaled_scores_0")]; + tensor attention_20_masked_scaled_scores_0 = add(x = attention_20_scaled_scores_0, y = transpose_0)[name = string("attention_20_masked_scaled_scores_0")]; + int32 softmax_40_axis_0 = const()[name = string("softmax_40_axis_0"), val = int32(-2)]; + tensor softmax_40 = softmax(axis = softmax_40_axis_0, x = attention_20_masked_scaled_scores_0)[name = string("softmax_40")]; + bool attention_20_attention_0_transpose_x_0 = const()[name = string("attention_20_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_20_attention_0_transpose_y_0 = const()[name = string("attention_20_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_20_attention_0 = matmul(transpose_x = attention_20_attention_0_transpose_x_0, transpose_y = attention_20_attention_0_transpose_y_0, x = softmax_40, y = attention_20_slice_value_cache_heads_0)[name = string("attention_20_attention_0")]; + bool attention_20_scores_1_transpose_y_0 = const()[name = string("attention_20_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_20_scores_1_transpose_x_0 = const()[name = string("attention_20_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_20_scores_1 = matmul(transpose_x = attention_20_scores_1_transpose_x_0, transpose_y = attention_20_scores_1_transpose_y_0, x = attention_20_key_cache_head_1, y = attention_20_q_splits_1)[name = string("attention_20_scores_1")]; + fp16 attention_20_scaled_scores_1_y_0 = const()[name = string("attention_20_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_20_scaled_scores_1 = mul(x = attention_20_scores_1, y = attention_20_scaled_scores_1_y_0)[name = string("attention_20_scaled_scores_1")]; + tensor attention_20_masked_scaled_scores_1 = add(x = attention_20_scaled_scores_1, y = transpose_0)[name = string("attention_20_masked_scaled_scores_1")]; + int32 softmax_41_axis_0 = const()[name = string("softmax_41_axis_0"), val = int32(-2)]; + tensor softmax_41 = softmax(axis = softmax_41_axis_0, x = attention_20_masked_scaled_scores_1)[name = string("softmax_41")]; + bool attention_20_attention_1_transpose_x_0 = const()[name = string("attention_20_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_20_attention_1_transpose_y_0 = const()[name = string("attention_20_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_20_attention_1 = matmul(transpose_x = attention_20_attention_1_transpose_x_0, transpose_y = attention_20_attention_1_transpose_y_0, x = softmax_41, y = attention_20_slice_value_cache_heads_1)[name = string("attention_20_attention_1")]; + int32 attention_20_concat_attention_all_heads_axis_0 = const()[name = string("attention_20_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_20_concat_attention_all_heads_interleave_0 = const()[name = string("attention_20_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_20_concat_attention_all_heads = concat(axis = attention_20_concat_attention_all_heads_axis_0, interleave = attention_20_concat_attention_all_heads_interleave_0, values = (attention_20_attention_0, attention_20_attention_1))[name = string("attention_20_concat_attention_all_heads")]; + tensor attention_20_channels_first_retransposed_perm_0 = const()[name = string("attention_20_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_20_reshaped_shape_0 = const()[name = string("attention_20_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_20_channels_first_retransposed = transpose(perm = attention_20_channels_first_retransposed_perm_0, x = attention_20_concat_attention_all_heads)[name = string("transpose_7")]; + tensor attention_20_reshaped = reshape(shape = attention_20_reshaped_shape_0, x = attention_20_channels_first_retransposed)[name = string("attention_20_reshaped")]; + tensor attention_20_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524347392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524949568))))[name = string("attention_20_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_96 = constexpr_blockwise_shift_scale(data = attention_20_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524978304))))[name = string("constexpr_blockwise_shift_scale_96")]; + tensor attention_20_outproj_strides_0 = const()[name = string("attention_20_outproj_strides_0"), val = tensor([1])]; + string attention_20_outproj_pad_type_0 = const()[name = string("attention_20_outproj_pad_type_0"), val = string("valid")]; + tensor attention_20_outproj_pad_0 = const()[name = string("attention_20_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_20_outproj_dilations_0 = const()[name = string("attention_20_outproj_dilations_0"), val = tensor([1])]; + int32 attention_20_outproj_groups_0 = const()[name = string("attention_20_outproj_groups_0"), val = int32(1)]; + tensor attention_20_outproj = conv(dilations = attention_20_outproj_dilations_0, groups = attention_20_outproj_groups_0, pad = attention_20_outproj_pad_0, pad_type = attention_20_outproj_pad_type_0, strides = attention_20_outproj_strides_0, weight = constexpr_blockwise_shift_scale_96, x = attention_20_reshaped)[name = string("attention_20_outproj")]; + tensor block_20_residual_1 = add(x = block_19_residual_2, y = attention_20_outproj)[name = string("block_20_residual_1")]; + tensor block_20_ffn_rmsnorm_abs = abs(x = block_20_residual_1)[name = string("block_20_ffn_rmsnorm_abs")]; + tensor block_20_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_20_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_20_ffn_rmsnorm_maxval = reduce_max(axes = block_20_ffn_rmsnorm_maxval_axes_0, keep_dims = block_20_ffn_rmsnorm_maxval_keep_dims_0, x = block_20_ffn_rmsnorm_abs)[name = string("block_20_ffn_rmsnorm_maxval")]; + fp16 block_20_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_20_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_20_ffn_rmsnorm_maxval_clipped = clip(alpha = block_20_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_20_ffn_rmsnorm_maxval_clipped_beta_0, x = block_20_ffn_rmsnorm_maxval)[name = string("block_20_ffn_rmsnorm_maxval_clipped")]; + tensor block_20_ffn_rmsnorm_scaled = real_div(x = block_20_residual_1, y = block_20_ffn_rmsnorm_maxval_clipped)[name = string("block_20_ffn_rmsnorm_scaled")]; + tensor block_20_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_20_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_20_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_20_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_20_ffn_rmsnorm_scaled)[name = string("block_20_ffn_rmsnorm_squared_sum")]; + fp16 block_20_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_20_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_20_ffn_rmsnorm_rsqrt_epsilon_0, x = block_20_ffn_rmsnorm_squared_sum)[name = string("block_20_ffn_rmsnorm_rsqrt")]; + fp16 block_20_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_20_ffn_rmsnorm_dim_scaled = mul(x = block_20_ffn_rmsnorm_scaled, y = block_20_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_20_ffn_rmsnorm_dim_scaled")]; + tensor block_20_ffn_rmsnorm_normalized = mul(x = block_20_ffn_rmsnorm_dim_scaled, y = block_20_ffn_rmsnorm_rsqrt)[name = string("block_20_ffn_rmsnorm_normalized")]; + tensor block_20_ffn_rmsnorm_y_0 = const()[name = string("block_20_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524980160)))]; + tensor block_20_ffn_rmsnorm = mul(x = block_20_ffn_rmsnorm_normalized, y = block_20_ffn_rmsnorm_y_0)[name = string("block_20_ffn_rmsnorm")]; + tensor block_20_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524982016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528250688))))[name = string("block_20_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_97 = constexpr_blockwise_shift_scale(data = block_20_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528406400))))[name = string("constexpr_blockwise_shift_scale_97")]; + tensor block_20_ffn_inproj_strides_0 = const()[name = string("block_20_ffn_inproj_strides_0"), val = tensor([1])]; + string block_20_ffn_inproj_pad_type_0 = const()[name = string("block_20_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_20_ffn_inproj_pad_0 = const()[name = string("block_20_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_20_ffn_inproj_dilations_0 = const()[name = string("block_20_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_20_ffn_inproj_groups_0 = const()[name = string("block_20_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_20_ffn_inproj = conv(dilations = block_20_ffn_inproj_dilations_0, groups = block_20_ffn_inproj_groups_0, pad = block_20_ffn_inproj_pad_0, pad_type = block_20_ffn_inproj_pad_type_0, strides = block_20_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_97, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_inproj")]; + tensor block_20_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528416192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531684864))))[name = string("block_20_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_98 = constexpr_blockwise_shift_scale(data = block_20_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531840576))))[name = string("constexpr_blockwise_shift_scale_98")]; + tensor block_20_ffn_g_strides_0 = const()[name = string("block_20_ffn_g_strides_0"), val = tensor([1])]; + string block_20_ffn_g_pad_type_0 = const()[name = string("block_20_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_20_ffn_g_pad_0 = const()[name = string("block_20_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_20_ffn_g_dilations_0 = const()[name = string("block_20_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_20_ffn_g_groups_0 = const()[name = string("block_20_ffn_g_groups_0"), val = int32(1)]; + tensor block_20_ffn_g = conv(dilations = block_20_ffn_g_dilations_0, groups = block_20_ffn_g_groups_0, pad = block_20_ffn_g_pad_0, pad_type = block_20_ffn_g_pad_type_0, strides = block_20_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_98, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_g")]; + tensor block_20_ffn_g_activation = silu(x = block_20_ffn_g)[name = string("block_20_ffn_g_activation")]; + tensor block_20_ffn_x_gated = mul(x = block_20_ffn_inproj, y = block_20_ffn_g_activation)[name = string("block_20_ffn_x_gated")]; + tensor block_20_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531850368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535119040))))[name = string("block_20_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_99 = constexpr_blockwise_shift_scale(data = block_20_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535147776))))[name = string("constexpr_blockwise_shift_scale_99")]; + tensor block_20_ffn_outproj_strides_0 = const()[name = string("block_20_ffn_outproj_strides_0"), val = tensor([1])]; + string block_20_ffn_outproj_pad_type_0 = const()[name = string("block_20_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_20_ffn_outproj_pad_0 = const()[name = string("block_20_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_20_ffn_outproj_dilations_0 = const()[name = string("block_20_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_20_ffn_outproj_groups_0 = const()[name = string("block_20_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_20_ffn_outproj = conv(dilations = block_20_ffn_outproj_dilations_0, groups = block_20_ffn_outproj_groups_0, pad = block_20_ffn_outproj_pad_0, pad_type = block_20_ffn_outproj_pad_type_0, strides = block_20_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_99, x = block_20_ffn_x_gated)[name = string("block_20_ffn_outproj")]; + tensor block_20_residual_2 = add(x = block_20_ffn_outproj, y = block_20_residual_1)[name = string("block_20_residual_2")]; + tensor block_21_attention_rmsnorm_abs = abs(x = block_20_residual_2)[name = string("block_21_attention_rmsnorm_abs")]; + tensor block_21_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_21_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_21_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_21_attention_rmsnorm_maxval = reduce_max(axes = block_21_attention_rmsnorm_maxval_axes_0, keep_dims = block_21_attention_rmsnorm_maxval_keep_dims_0, x = block_21_attention_rmsnorm_abs)[name = string("block_21_attention_rmsnorm_maxval")]; + fp16 block_21_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_21_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_21_attention_rmsnorm_maxval_clipped = clip(alpha = block_21_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_21_attention_rmsnorm_maxval_clipped_beta_0, x = block_21_attention_rmsnorm_maxval)[name = string("block_21_attention_rmsnorm_maxval_clipped")]; + tensor block_21_attention_rmsnorm_scaled = real_div(x = block_20_residual_2, y = block_21_attention_rmsnorm_maxval_clipped)[name = string("block_21_attention_rmsnorm_scaled")]; + tensor block_21_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_21_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_21_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_21_attention_rmsnorm_squared_sum_keep_dims_0, x = block_21_attention_rmsnorm_scaled)[name = string("block_21_attention_rmsnorm_squared_sum")]; + fp16 block_21_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_21_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_21_attention_rmsnorm_rsqrt_epsilon_0, x = block_21_attention_rmsnorm_squared_sum)[name = string("block_21_attention_rmsnorm_rsqrt")]; + fp16 block_21_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_21_attention_rmsnorm_dim_scaled = mul(x = block_21_attention_rmsnorm_scaled, y = block_21_attention_rmsnorm_dim_scaled_y_0)[name = string("block_21_attention_rmsnorm_dim_scaled")]; + tensor block_21_attention_rmsnorm_normalized = mul(x = block_21_attention_rmsnorm_dim_scaled, y = block_21_attention_rmsnorm_rsqrt)[name = string("block_21_attention_rmsnorm_normalized")]; + tensor block_21_attention_rmsnorm_y_0 = const()[name = string("block_21_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535149632)))]; + tensor block_21_attention_rmsnorm = mul(x = block_21_attention_rmsnorm_normalized, y = block_21_attention_rmsnorm_y_0)[name = string("block_21_attention_rmsnorm")]; + tensor attention_21_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535151488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535925696))))[name = string("attention_21_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_100 = constexpr_blockwise_shift_scale(data = attention_21_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535962624))))[name = string("constexpr_blockwise_shift_scale_100")]; + tensor attention_21_qkvproj_bias_0 = const()[name = string("attention_21_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535964992)))]; + tensor attention_21_qkvproj_strides_0 = const()[name = string("attention_21_qkvproj_strides_0"), val = tensor([1])]; + string attention_21_qkvproj_pad_type_0 = const()[name = string("attention_21_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_21_qkvproj_pad_0 = const()[name = string("attention_21_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_21_qkvproj_dilations_0 = const()[name = string("attention_21_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_21_qkvproj_groups_0 = const()[name = string("attention_21_qkvproj_groups_0"), val = int32(1)]; + tensor attention_21_qkvproj = conv(bias = attention_21_qkvproj_bias_0, dilations = attention_21_qkvproj_dilations_0, groups = attention_21_qkvproj_groups_0, pad = attention_21_qkvproj_pad_0, pad_type = attention_21_qkvproj_pad_type_0, strides = attention_21_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_100, x = block_21_attention_rmsnorm)[name = string("attention_21_qkvproj")]; + tensor attention_21_head_reshape_shape_0 = const()[name = string("attention_21_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_21_head_reshape = reshape(shape = attention_21_head_reshape_shape_0, x = attention_21_qkvproj)[name = string("attention_21_head_reshape")]; + tensor attention_21_head_transpose_perm_0 = const()[name = string("attention_21_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_21_split_qkv_heads_axis_0 = const()[name = string("attention_21_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_21_split_qkv_heads_split_sizes_0 = const()[name = string("attention_21_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_21_head_transpose = transpose(perm = attention_21_head_transpose_perm_0, x = attention_21_head_reshape)[name = string("transpose_6")]; + tensor attention_21_split_qkv_heads_0, tensor attention_21_split_qkv_heads_1, tensor attention_21_split_qkv_heads_2 = split(axis = attention_21_split_qkv_heads_axis_0, split_sizes = attention_21_split_qkv_heads_split_sizes_0, x = attention_21_head_transpose)[name = string("attention_21_split_qkv_heads")]; + tensor attention_21_q_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_21_q_rope_lhs_mult")]; + int32 attention_21_q_rotate_half_split_num_splits_0 = const()[name = string("attention_21_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_21_q_rotate_half_split_axis_0 = const()[name = string("attention_21_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_21_q_rotate_half_split_0, tensor attention_21_q_rotate_half_split_1 = split(axis = attention_21_q_rotate_half_split_axis_0, num_splits = attention_21_q_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_0)[name = string("attention_21_q_rotate_half_split")]; + fp16 attention_21_q_rotate_half_neg_y_0 = const()[name = string("attention_21_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_21_q_rotate_half_neg = mul(x = attention_21_q_rotate_half_split_1, y = attention_21_q_rotate_half_neg_y_0)[name = string("attention_21_q_rotate_half_neg")]; + int32 attention_21_q_rotate_half_concat_axis_0 = const()[name = string("attention_21_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_21_q_rotate_half_concat_interleave_0 = const()[name = string("attention_21_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_21_q_rotate_half_concat = concat(axis = attention_21_q_rotate_half_concat_axis_0, interleave = attention_21_q_rotate_half_concat_interleave_0, values = (attention_21_q_rotate_half_neg, attention_21_q_rotate_half_split_0))[name = string("attention_21_q_rotate_half_concat")]; + tensor attention_21_q_rope_rhs_mult = mul(x = attention_21_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_q_rope_rhs_mult")]; + tensor attention_21_q_rope = add(x = attention_21_q_rope_lhs_mult, y = attention_21_q_rope_rhs_mult)[name = string("attention_21_q_rope")]; + tensor attention_21_k_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_21_k_rope_lhs_mult")]; + int32 attention_21_k_rotate_half_split_num_splits_0 = const()[name = string("attention_21_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_21_k_rotate_half_split_axis_0 = const()[name = string("attention_21_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_21_k_rotate_half_split_0, tensor attention_21_k_rotate_half_split_1 = split(axis = attention_21_k_rotate_half_split_axis_0, num_splits = attention_21_k_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_1)[name = string("attention_21_k_rotate_half_split")]; + fp16 attention_21_k_rotate_half_neg_y_0 = const()[name = string("attention_21_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_21_k_rotate_half_neg = mul(x = attention_21_k_rotate_half_split_1, y = attention_21_k_rotate_half_neg_y_0)[name = string("attention_21_k_rotate_half_neg")]; + int32 attention_21_k_rotate_half_concat_axis_0 = const()[name = string("attention_21_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_21_k_rotate_half_concat_interleave_0 = const()[name = string("attention_21_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_21_k_rotate_half_concat = concat(axis = attention_21_k_rotate_half_concat_axis_0, interleave = attention_21_k_rotate_half_concat_interleave_0, values = (attention_21_k_rotate_half_neg, attention_21_k_rotate_half_split_0))[name = string("attention_21_k_rotate_half_concat")]; + tensor attention_21_k_rope_rhs_mult = mul(x = attention_21_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_k_rope_rhs_mult")]; + tensor attention_21_k_rope = add(x = attention_21_k_rope_lhs_mult, y = attention_21_k_rope_rhs_mult)[name = string("attention_21_k_rope")]; + int32 attention_21_q_splits_axis_0 = const()[name = string("attention_21_q_splits_axis_0"), val = int32(1)]; + int32 attention_21_q_splits_num_splits_0 = const()[name = string("attention_21_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_21_q_splits_0, tensor attention_21_q_splits_1 = split(axis = attention_21_q_splits_axis_0, num_splits = attention_21_q_splits_num_splits_0, x = attention_21_q_rope)[name = string("attention_21_q_splits")]; + tensor attention_21_update_begin_0_values0_0 = const()[name = string("attention_21_update_begin_0_values0_0"), val = tensor([21])]; + tensor attention_21_update_begin_0_values1_0 = const()[name = string("attention_21_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_21_update_begin_0_values3_0 = const()[name = string("attention_21_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_21_update_begin_0_axis_0 = const()[name = string("attention_21_update_begin_0_axis_0"), val = int32(0)]; + bool attention_21_update_begin_0_interleave_0 = const()[name = string("attention_21_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_21_update_begin_0 = concat(axis = attention_21_update_begin_0_axis_0, interleave = attention_21_update_begin_0_interleave_0, values = (attention_21_update_begin_0_values0_0, attention_21_update_begin_0_values1_0, query_pos1, attention_21_update_begin_0_values3_0))[name = string("attention_21_update_begin_0")]; + tensor attention_21_update_end_0_values0_0 = const()[name = string("attention_21_update_end_0_values0_0"), val = tensor([22])]; + tensor attention_21_update_end_0_values1_0 = const()[name = string("attention_21_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_21_update_end_0_values3_0 = const()[name = string("attention_21_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_21_update_end_0_axis_0 = const()[name = string("attention_21_update_end_0_axis_0"), val = int32(0)]; + bool attention_21_update_end_0_interleave_0 = const()[name = string("attention_21_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_21_update_end_0 = concat(axis = attention_21_update_end_0_axis_0, interleave = attention_21_update_end_0_interleave_0, values = (attention_21_update_end_0_values0_0, attention_21_update_end_0_values1_0, end_pos_0, attention_21_update_end_0_values3_0))[name = string("attention_21_update_end_0")]; + tensor attention_21_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_updated_key_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_key_cache_0_squeeze_mask_0, update = attention_21_k_rope, x = coreml_update_state_40)[name = string("attention_21_updated_key_cache_0")]; + write_state(data = attention_21_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_42 = read_state(input = key_cache_state)[name = string("coreml_update_state_90")]; + tensor attention_21_key_cache_begin_0 = const()[name = string("attention_21_key_cache_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor attention_21_key_cache_end_0 = const()[name = string("attention_21_key_cache_end_0"), val = tensor([22, 2, 512, 64])]; + tensor attention_21_key_cache_squeeze_mask_0 = const()[name = string("attention_21_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_key_cache = slice_by_index(begin = attention_21_key_cache_begin_0, end = attention_21_key_cache_end_0, squeeze_mask = attention_21_key_cache_squeeze_mask_0, x = coreml_update_state_42)[name = string("attention_21_key_cache")]; + int32 attention_21_key_cache_head_axis_0 = const()[name = string("attention_21_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_21_key_cache_head_num_splits_0 = const()[name = string("attention_21_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_21_key_cache_head_0, tensor attention_21_key_cache_head_1 = split(axis = attention_21_key_cache_head_axis_0, num_splits = attention_21_key_cache_head_num_splits_0, x = attention_21_key_cache)[name = string("attention_21_key_cache_head")]; + tensor attention_21_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_updated_value_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_value_cache_0_squeeze_mask_0, update = attention_21_split_qkv_heads_2, x = coreml_update_state_41)[name = string("attention_21_updated_value_cache_0")]; + write_state(data = attention_21_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_43 = read_state(input = value_cache_state)[name = string("coreml_update_state_91")]; + tensor attention_21_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_21_slice_current_layer_value_cache_begin_0"), val = tensor([21, 0, 0, 0])]; + tensor attention_21_slice_current_layer_value_cache_end_0 = const()[name = string("attention_21_slice_current_layer_value_cache_end_0"), val = tensor([22, 2, 512, 64])]; + tensor attention_21_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_21_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_21_slice_current_layer_value_cache = slice_by_index(begin = attention_21_slice_current_layer_value_cache_begin_0, end = attention_21_slice_current_layer_value_cache_end_0, squeeze_mask = attention_21_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_43)[name = string("attention_21_slice_current_layer_value_cache")]; + int32 attention_21_slice_value_cache_heads_axis_0 = const()[name = string("attention_21_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_21_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_21_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_21_slice_value_cache_heads_0, tensor attention_21_slice_value_cache_heads_1 = split(axis = attention_21_slice_value_cache_heads_axis_0, num_splits = attention_21_slice_value_cache_heads_num_splits_0, x = attention_21_slice_current_layer_value_cache)[name = string("attention_21_slice_value_cache_heads")]; + bool attention_21_scores_0_transpose_y_0 = const()[name = string("attention_21_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_21_scores_0_transpose_x_0 = const()[name = string("attention_21_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_21_scores_0 = matmul(transpose_x = attention_21_scores_0_transpose_x_0, transpose_y = attention_21_scores_0_transpose_y_0, x = attention_21_key_cache_head_0, y = attention_21_q_splits_0)[name = string("attention_21_scores_0")]; + fp16 attention_21_scaled_scores_0_y_0 = const()[name = string("attention_21_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_21_scaled_scores_0 = mul(x = attention_21_scores_0, y = attention_21_scaled_scores_0_y_0)[name = string("attention_21_scaled_scores_0")]; + tensor attention_21_masked_scaled_scores_0 = add(x = attention_21_scaled_scores_0, y = transpose_0)[name = string("attention_21_masked_scaled_scores_0")]; + int32 softmax_42_axis_0 = const()[name = string("softmax_42_axis_0"), val = int32(-2)]; + tensor softmax_42 = softmax(axis = softmax_42_axis_0, x = attention_21_masked_scaled_scores_0)[name = string("softmax_42")]; + bool attention_21_attention_0_transpose_x_0 = const()[name = string("attention_21_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_21_attention_0_transpose_y_0 = const()[name = string("attention_21_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_21_attention_0 = matmul(transpose_x = attention_21_attention_0_transpose_x_0, transpose_y = attention_21_attention_0_transpose_y_0, x = softmax_42, y = attention_21_slice_value_cache_heads_0)[name = string("attention_21_attention_0")]; + bool attention_21_scores_1_transpose_y_0 = const()[name = string("attention_21_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_21_scores_1_transpose_x_0 = const()[name = string("attention_21_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_21_scores_1 = matmul(transpose_x = attention_21_scores_1_transpose_x_0, transpose_y = attention_21_scores_1_transpose_y_0, x = attention_21_key_cache_head_1, y = attention_21_q_splits_1)[name = string("attention_21_scores_1")]; + fp16 attention_21_scaled_scores_1_y_0 = const()[name = string("attention_21_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_21_scaled_scores_1 = mul(x = attention_21_scores_1, y = attention_21_scaled_scores_1_y_0)[name = string("attention_21_scaled_scores_1")]; + tensor attention_21_masked_scaled_scores_1 = add(x = attention_21_scaled_scores_1, y = transpose_0)[name = string("attention_21_masked_scaled_scores_1")]; + int32 softmax_43_axis_0 = const()[name = string("softmax_43_axis_0"), val = int32(-2)]; + tensor softmax_43 = softmax(axis = softmax_43_axis_0, x = attention_21_masked_scaled_scores_1)[name = string("softmax_43")]; + bool attention_21_attention_1_transpose_x_0 = const()[name = string("attention_21_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_21_attention_1_transpose_y_0 = const()[name = string("attention_21_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_21_attention_1 = matmul(transpose_x = attention_21_attention_1_transpose_x_0, transpose_y = attention_21_attention_1_transpose_y_0, x = softmax_43, y = attention_21_slice_value_cache_heads_1)[name = string("attention_21_attention_1")]; + int32 attention_21_concat_attention_all_heads_axis_0 = const()[name = string("attention_21_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_21_concat_attention_all_heads_interleave_0 = const()[name = string("attention_21_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_21_concat_attention_all_heads = concat(axis = attention_21_concat_attention_all_heads_axis_0, interleave = attention_21_concat_attention_all_heads_interleave_0, values = (attention_21_attention_0, attention_21_attention_1))[name = string("attention_21_concat_attention_all_heads")]; + tensor attention_21_channels_first_retransposed_perm_0 = const()[name = string("attention_21_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_21_reshaped_shape_0 = const()[name = string("attention_21_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_21_channels_first_retransposed = transpose(perm = attention_21_channels_first_retransposed_perm_0, x = attention_21_concat_attention_all_heads)[name = string("transpose_5")]; + tensor attention_21_reshaped = reshape(shape = attention_21_reshaped_shape_0, x = attention_21_channels_first_retransposed)[name = string("attention_21_reshaped")]; + tensor attention_21_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536569536))))[name = string("attention_21_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_101 = constexpr_blockwise_shift_scale(data = attention_21_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536598272))))[name = string("constexpr_blockwise_shift_scale_101")]; + tensor attention_21_outproj_strides_0 = const()[name = string("attention_21_outproj_strides_0"), val = tensor([1])]; + string attention_21_outproj_pad_type_0 = const()[name = string("attention_21_outproj_pad_type_0"), val = string("valid")]; + tensor attention_21_outproj_pad_0 = const()[name = string("attention_21_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_21_outproj_dilations_0 = const()[name = string("attention_21_outproj_dilations_0"), val = tensor([1])]; + int32 attention_21_outproj_groups_0 = const()[name = string("attention_21_outproj_groups_0"), val = int32(1)]; + tensor attention_21_outproj = conv(dilations = attention_21_outproj_dilations_0, groups = attention_21_outproj_groups_0, pad = attention_21_outproj_pad_0, pad_type = attention_21_outproj_pad_type_0, strides = attention_21_outproj_strides_0, weight = constexpr_blockwise_shift_scale_101, x = attention_21_reshaped)[name = string("attention_21_outproj")]; + tensor block_21_residual_1 = add(x = block_20_residual_2, y = attention_21_outproj)[name = string("block_21_residual_1")]; + tensor block_21_ffn_rmsnorm_abs = abs(x = block_21_residual_1)[name = string("block_21_ffn_rmsnorm_abs")]; + tensor block_21_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_21_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_21_ffn_rmsnorm_maxval = reduce_max(axes = block_21_ffn_rmsnorm_maxval_axes_0, keep_dims = block_21_ffn_rmsnorm_maxval_keep_dims_0, x = block_21_ffn_rmsnorm_abs)[name = string("block_21_ffn_rmsnorm_maxval")]; + fp16 block_21_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_21_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_21_ffn_rmsnorm_maxval_clipped = clip(alpha = block_21_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_21_ffn_rmsnorm_maxval_clipped_beta_0, x = block_21_ffn_rmsnorm_maxval)[name = string("block_21_ffn_rmsnorm_maxval_clipped")]; + tensor block_21_ffn_rmsnorm_scaled = real_div(x = block_21_residual_1, y = block_21_ffn_rmsnorm_maxval_clipped)[name = string("block_21_ffn_rmsnorm_scaled")]; + tensor block_21_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_21_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_21_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_21_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_21_ffn_rmsnorm_scaled)[name = string("block_21_ffn_rmsnorm_squared_sum")]; + fp16 block_21_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_21_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_21_ffn_rmsnorm_rsqrt_epsilon_0, x = block_21_ffn_rmsnorm_squared_sum)[name = string("block_21_ffn_rmsnorm_rsqrt")]; + fp16 block_21_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_21_ffn_rmsnorm_dim_scaled = mul(x = block_21_ffn_rmsnorm_scaled, y = block_21_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_21_ffn_rmsnorm_dim_scaled")]; + tensor block_21_ffn_rmsnorm_normalized = mul(x = block_21_ffn_rmsnorm_dim_scaled, y = block_21_ffn_rmsnorm_rsqrt)[name = string("block_21_ffn_rmsnorm_normalized")]; + tensor block_21_ffn_rmsnorm_y_0 = const()[name = string("block_21_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536600128)))]; + tensor block_21_ffn_rmsnorm = mul(x = block_21_ffn_rmsnorm_normalized, y = block_21_ffn_rmsnorm_y_0)[name = string("block_21_ffn_rmsnorm")]; + tensor block_21_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536601984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539870656))))[name = string("block_21_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_102 = constexpr_blockwise_shift_scale(data = block_21_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540026368))))[name = string("constexpr_blockwise_shift_scale_102")]; + tensor block_21_ffn_inproj_strides_0 = const()[name = string("block_21_ffn_inproj_strides_0"), val = tensor([1])]; + string block_21_ffn_inproj_pad_type_0 = const()[name = string("block_21_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_21_ffn_inproj_pad_0 = const()[name = string("block_21_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_21_ffn_inproj_dilations_0 = const()[name = string("block_21_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_21_ffn_inproj_groups_0 = const()[name = string("block_21_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_21_ffn_inproj = conv(dilations = block_21_ffn_inproj_dilations_0, groups = block_21_ffn_inproj_groups_0, pad = block_21_ffn_inproj_pad_0, pad_type = block_21_ffn_inproj_pad_type_0, strides = block_21_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_102, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_inproj")]; + tensor block_21_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540036160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543304832))))[name = string("block_21_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_103 = constexpr_blockwise_shift_scale(data = block_21_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543460544))))[name = string("constexpr_blockwise_shift_scale_103")]; + tensor block_21_ffn_g_strides_0 = const()[name = string("block_21_ffn_g_strides_0"), val = tensor([1])]; + string block_21_ffn_g_pad_type_0 = const()[name = string("block_21_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_21_ffn_g_pad_0 = const()[name = string("block_21_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_21_ffn_g_dilations_0 = const()[name = string("block_21_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_21_ffn_g_groups_0 = const()[name = string("block_21_ffn_g_groups_0"), val = int32(1)]; + tensor block_21_ffn_g = conv(dilations = block_21_ffn_g_dilations_0, groups = block_21_ffn_g_groups_0, pad = block_21_ffn_g_pad_0, pad_type = block_21_ffn_g_pad_type_0, strides = block_21_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_103, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_g")]; + tensor block_21_ffn_g_activation = silu(x = block_21_ffn_g)[name = string("block_21_ffn_g_activation")]; + tensor block_21_ffn_x_gated = mul(x = block_21_ffn_inproj, y = block_21_ffn_g_activation)[name = string("block_21_ffn_x_gated")]; + tensor block_21_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543470336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546739008))))[name = string("block_21_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_104 = constexpr_blockwise_shift_scale(data = block_21_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546767744))))[name = string("constexpr_blockwise_shift_scale_104")]; + tensor block_21_ffn_outproj_strides_0 = const()[name = string("block_21_ffn_outproj_strides_0"), val = tensor([1])]; + string block_21_ffn_outproj_pad_type_0 = const()[name = string("block_21_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_21_ffn_outproj_pad_0 = const()[name = string("block_21_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_21_ffn_outproj_dilations_0 = const()[name = string("block_21_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_21_ffn_outproj_groups_0 = const()[name = string("block_21_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_21_ffn_outproj = conv(dilations = block_21_ffn_outproj_dilations_0, groups = block_21_ffn_outproj_groups_0, pad = block_21_ffn_outproj_pad_0, pad_type = block_21_ffn_outproj_pad_type_0, strides = block_21_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_104, x = block_21_ffn_x_gated)[name = string("block_21_ffn_outproj")]; + tensor block_21_residual_2 = add(x = block_21_ffn_outproj, y = block_21_residual_1)[name = string("block_21_residual_2")]; + tensor block_22_attention_rmsnorm_abs = abs(x = block_21_residual_2)[name = string("block_22_attention_rmsnorm_abs")]; + tensor block_22_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_22_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_22_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_22_attention_rmsnorm_maxval = reduce_max(axes = block_22_attention_rmsnorm_maxval_axes_0, keep_dims = block_22_attention_rmsnorm_maxval_keep_dims_0, x = block_22_attention_rmsnorm_abs)[name = string("block_22_attention_rmsnorm_maxval")]; + fp16 block_22_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_22_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_22_attention_rmsnorm_maxval_clipped = clip(alpha = block_22_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_22_attention_rmsnorm_maxval_clipped_beta_0, x = block_22_attention_rmsnorm_maxval)[name = string("block_22_attention_rmsnorm_maxval_clipped")]; + tensor block_22_attention_rmsnorm_scaled = real_div(x = block_21_residual_2, y = block_22_attention_rmsnorm_maxval_clipped)[name = string("block_22_attention_rmsnorm_scaled")]; + tensor block_22_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_22_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_22_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_22_attention_rmsnorm_squared_sum_keep_dims_0, x = block_22_attention_rmsnorm_scaled)[name = string("block_22_attention_rmsnorm_squared_sum")]; + fp16 block_22_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_22_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_22_attention_rmsnorm_rsqrt_epsilon_0, x = block_22_attention_rmsnorm_squared_sum)[name = string("block_22_attention_rmsnorm_rsqrt")]; + fp16 block_22_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_22_attention_rmsnorm_dim_scaled = mul(x = block_22_attention_rmsnorm_scaled, y = block_22_attention_rmsnorm_dim_scaled_y_0)[name = string("block_22_attention_rmsnorm_dim_scaled")]; + tensor block_22_attention_rmsnorm_normalized = mul(x = block_22_attention_rmsnorm_dim_scaled, y = block_22_attention_rmsnorm_rsqrt)[name = string("block_22_attention_rmsnorm_normalized")]; + tensor block_22_attention_rmsnorm_y_0 = const()[name = string("block_22_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546769600)))]; + tensor block_22_attention_rmsnorm = mul(x = block_22_attention_rmsnorm_normalized, y = block_22_attention_rmsnorm_y_0)[name = string("block_22_attention_rmsnorm")]; + tensor attention_22_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546771456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547545664))))[name = string("attention_22_qkvproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_105 = constexpr_blockwise_shift_scale(data = attention_22_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547582592))))[name = string("constexpr_blockwise_shift_scale_105")]; + tensor attention_22_qkvproj_bias_0 = const()[name = string("attention_22_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547584960)))]; + tensor attention_22_qkvproj_strides_0 = const()[name = string("attention_22_qkvproj_strides_0"), val = tensor([1])]; + string attention_22_qkvproj_pad_type_0 = const()[name = string("attention_22_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_22_qkvproj_pad_0 = const()[name = string("attention_22_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_22_qkvproj_dilations_0 = const()[name = string("attention_22_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_22_qkvproj_groups_0 = const()[name = string("attention_22_qkvproj_groups_0"), val = int32(1)]; + tensor attention_22_qkvproj = conv(bias = attention_22_qkvproj_bias_0, dilations = attention_22_qkvproj_dilations_0, groups = attention_22_qkvproj_groups_0, pad = attention_22_qkvproj_pad_0, pad_type = attention_22_qkvproj_pad_type_0, strides = attention_22_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_105, x = block_22_attention_rmsnorm)[name = string("attention_22_qkvproj")]; + tensor attention_22_head_reshape_shape_0 = const()[name = string("attention_22_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_22_head_reshape = reshape(shape = attention_22_head_reshape_shape_0, x = attention_22_qkvproj)[name = string("attention_22_head_reshape")]; + tensor attention_22_head_transpose_perm_0 = const()[name = string("attention_22_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_22_split_qkv_heads_axis_0 = const()[name = string("attention_22_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_22_split_qkv_heads_split_sizes_0 = const()[name = string("attention_22_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_22_head_transpose = transpose(perm = attention_22_head_transpose_perm_0, x = attention_22_head_reshape)[name = string("transpose_4")]; + tensor attention_22_split_qkv_heads_0, tensor attention_22_split_qkv_heads_1, tensor attention_22_split_qkv_heads_2 = split(axis = attention_22_split_qkv_heads_axis_0, split_sizes = attention_22_split_qkv_heads_split_sizes_0, x = attention_22_head_transpose)[name = string("attention_22_split_qkv_heads")]; + tensor attention_22_q_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_22_q_rope_lhs_mult")]; + int32 attention_22_q_rotate_half_split_num_splits_0 = const()[name = string("attention_22_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_22_q_rotate_half_split_axis_0 = const()[name = string("attention_22_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_22_q_rotate_half_split_0, tensor attention_22_q_rotate_half_split_1 = split(axis = attention_22_q_rotate_half_split_axis_0, num_splits = attention_22_q_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_0)[name = string("attention_22_q_rotate_half_split")]; + fp16 attention_22_q_rotate_half_neg_y_0 = const()[name = string("attention_22_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_22_q_rotate_half_neg = mul(x = attention_22_q_rotate_half_split_1, y = attention_22_q_rotate_half_neg_y_0)[name = string("attention_22_q_rotate_half_neg")]; + int32 attention_22_q_rotate_half_concat_axis_0 = const()[name = string("attention_22_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_22_q_rotate_half_concat_interleave_0 = const()[name = string("attention_22_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_22_q_rotate_half_concat = concat(axis = attention_22_q_rotate_half_concat_axis_0, interleave = attention_22_q_rotate_half_concat_interleave_0, values = (attention_22_q_rotate_half_neg, attention_22_q_rotate_half_split_0))[name = string("attention_22_q_rotate_half_concat")]; + tensor attention_22_q_rope_rhs_mult = mul(x = attention_22_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_q_rope_rhs_mult")]; + tensor attention_22_q_rope = add(x = attention_22_q_rope_lhs_mult, y = attention_22_q_rope_rhs_mult)[name = string("attention_22_q_rope")]; + tensor attention_22_k_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_22_k_rope_lhs_mult")]; + int32 attention_22_k_rotate_half_split_num_splits_0 = const()[name = string("attention_22_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_22_k_rotate_half_split_axis_0 = const()[name = string("attention_22_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_22_k_rotate_half_split_0, tensor attention_22_k_rotate_half_split_1 = split(axis = attention_22_k_rotate_half_split_axis_0, num_splits = attention_22_k_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_1)[name = string("attention_22_k_rotate_half_split")]; + fp16 attention_22_k_rotate_half_neg_y_0 = const()[name = string("attention_22_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_22_k_rotate_half_neg = mul(x = attention_22_k_rotate_half_split_1, y = attention_22_k_rotate_half_neg_y_0)[name = string("attention_22_k_rotate_half_neg")]; + int32 attention_22_k_rotate_half_concat_axis_0 = const()[name = string("attention_22_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_22_k_rotate_half_concat_interleave_0 = const()[name = string("attention_22_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_22_k_rotate_half_concat = concat(axis = attention_22_k_rotate_half_concat_axis_0, interleave = attention_22_k_rotate_half_concat_interleave_0, values = (attention_22_k_rotate_half_neg, attention_22_k_rotate_half_split_0))[name = string("attention_22_k_rotate_half_concat")]; + tensor attention_22_k_rope_rhs_mult = mul(x = attention_22_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_k_rope_rhs_mult")]; + tensor attention_22_k_rope = add(x = attention_22_k_rope_lhs_mult, y = attention_22_k_rope_rhs_mult)[name = string("attention_22_k_rope")]; + int32 attention_22_q_splits_axis_0 = const()[name = string("attention_22_q_splits_axis_0"), val = int32(1)]; + int32 attention_22_q_splits_num_splits_0 = const()[name = string("attention_22_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_22_q_splits_0, tensor attention_22_q_splits_1 = split(axis = attention_22_q_splits_axis_0, num_splits = attention_22_q_splits_num_splits_0, x = attention_22_q_rope)[name = string("attention_22_q_splits")]; + tensor attention_22_update_begin_0_values0_0 = const()[name = string("attention_22_update_begin_0_values0_0"), val = tensor([22])]; + tensor attention_22_update_begin_0_values1_0 = const()[name = string("attention_22_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_22_update_begin_0_values3_0 = const()[name = string("attention_22_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_22_update_begin_0_axis_0 = const()[name = string("attention_22_update_begin_0_axis_0"), val = int32(0)]; + bool attention_22_update_begin_0_interleave_0 = const()[name = string("attention_22_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_22_update_begin_0 = concat(axis = attention_22_update_begin_0_axis_0, interleave = attention_22_update_begin_0_interleave_0, values = (attention_22_update_begin_0_values0_0, attention_22_update_begin_0_values1_0, query_pos1, attention_22_update_begin_0_values3_0))[name = string("attention_22_update_begin_0")]; + tensor attention_22_update_end_0_values0_0 = const()[name = string("attention_22_update_end_0_values0_0"), val = tensor([23])]; + tensor attention_22_update_end_0_values1_0 = const()[name = string("attention_22_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_22_update_end_0_values3_0 = const()[name = string("attention_22_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_22_update_end_0_axis_0 = const()[name = string("attention_22_update_end_0_axis_0"), val = int32(0)]; + bool attention_22_update_end_0_interleave_0 = const()[name = string("attention_22_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_22_update_end_0 = concat(axis = attention_22_update_end_0_axis_0, interleave = attention_22_update_end_0_interleave_0, values = (attention_22_update_end_0_values0_0, attention_22_update_end_0_values1_0, end_pos_0, attention_22_update_end_0_values3_0))[name = string("attention_22_update_end_0")]; + tensor attention_22_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_updated_key_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_key_cache_0_squeeze_mask_0, update = attention_22_k_rope, x = coreml_update_state_42)[name = string("attention_22_updated_key_cache_0")]; + write_state(data = attention_22_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_44 = read_state(input = key_cache_state)[name = string("coreml_update_state_92")]; + tensor attention_22_key_cache_begin_0 = const()[name = string("attention_22_key_cache_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor attention_22_key_cache_end_0 = const()[name = string("attention_22_key_cache_end_0"), val = tensor([23, 2, 512, 64])]; + tensor attention_22_key_cache_squeeze_mask_0 = const()[name = string("attention_22_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_key_cache = slice_by_index(begin = attention_22_key_cache_begin_0, end = attention_22_key_cache_end_0, squeeze_mask = attention_22_key_cache_squeeze_mask_0, x = coreml_update_state_44)[name = string("attention_22_key_cache")]; + int32 attention_22_key_cache_head_axis_0 = const()[name = string("attention_22_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_22_key_cache_head_num_splits_0 = const()[name = string("attention_22_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_22_key_cache_head_0, tensor attention_22_key_cache_head_1 = split(axis = attention_22_key_cache_head_axis_0, num_splits = attention_22_key_cache_head_num_splits_0, x = attention_22_key_cache)[name = string("attention_22_key_cache_head")]; + tensor attention_22_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_updated_value_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_value_cache_0_squeeze_mask_0, update = attention_22_split_qkv_heads_2, x = coreml_update_state_43)[name = string("attention_22_updated_value_cache_0")]; + write_state(data = attention_22_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_45 = read_state(input = value_cache_state)[name = string("coreml_update_state_93")]; + tensor attention_22_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_22_slice_current_layer_value_cache_begin_0"), val = tensor([22, 0, 0, 0])]; + tensor attention_22_slice_current_layer_value_cache_end_0 = const()[name = string("attention_22_slice_current_layer_value_cache_end_0"), val = tensor([23, 2, 512, 64])]; + tensor attention_22_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_22_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_22_slice_current_layer_value_cache = slice_by_index(begin = attention_22_slice_current_layer_value_cache_begin_0, end = attention_22_slice_current_layer_value_cache_end_0, squeeze_mask = attention_22_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_45)[name = string("attention_22_slice_current_layer_value_cache")]; + int32 attention_22_slice_value_cache_heads_axis_0 = const()[name = string("attention_22_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_22_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_22_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_22_slice_value_cache_heads_0, tensor attention_22_slice_value_cache_heads_1 = split(axis = attention_22_slice_value_cache_heads_axis_0, num_splits = attention_22_slice_value_cache_heads_num_splits_0, x = attention_22_slice_current_layer_value_cache)[name = string("attention_22_slice_value_cache_heads")]; + bool attention_22_scores_0_transpose_y_0 = const()[name = string("attention_22_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_22_scores_0_transpose_x_0 = const()[name = string("attention_22_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_22_scores_0 = matmul(transpose_x = attention_22_scores_0_transpose_x_0, transpose_y = attention_22_scores_0_transpose_y_0, x = attention_22_key_cache_head_0, y = attention_22_q_splits_0)[name = string("attention_22_scores_0")]; + fp16 attention_22_scaled_scores_0_y_0 = const()[name = string("attention_22_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_22_scaled_scores_0 = mul(x = attention_22_scores_0, y = attention_22_scaled_scores_0_y_0)[name = string("attention_22_scaled_scores_0")]; + tensor attention_22_masked_scaled_scores_0 = add(x = attention_22_scaled_scores_0, y = transpose_0)[name = string("attention_22_masked_scaled_scores_0")]; + int32 softmax_44_axis_0 = const()[name = string("softmax_44_axis_0"), val = int32(-2)]; + tensor softmax_44 = softmax(axis = softmax_44_axis_0, x = attention_22_masked_scaled_scores_0)[name = string("softmax_44")]; + bool attention_22_attention_0_transpose_x_0 = const()[name = string("attention_22_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_22_attention_0_transpose_y_0 = const()[name = string("attention_22_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_22_attention_0 = matmul(transpose_x = attention_22_attention_0_transpose_x_0, transpose_y = attention_22_attention_0_transpose_y_0, x = softmax_44, y = attention_22_slice_value_cache_heads_0)[name = string("attention_22_attention_0")]; + bool attention_22_scores_1_transpose_y_0 = const()[name = string("attention_22_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_22_scores_1_transpose_x_0 = const()[name = string("attention_22_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_22_scores_1 = matmul(transpose_x = attention_22_scores_1_transpose_x_0, transpose_y = attention_22_scores_1_transpose_y_0, x = attention_22_key_cache_head_1, y = attention_22_q_splits_1)[name = string("attention_22_scores_1")]; + fp16 attention_22_scaled_scores_1_y_0 = const()[name = string("attention_22_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_22_scaled_scores_1 = mul(x = attention_22_scores_1, y = attention_22_scaled_scores_1_y_0)[name = string("attention_22_scaled_scores_1")]; + tensor attention_22_masked_scaled_scores_1 = add(x = attention_22_scaled_scores_1, y = transpose_0)[name = string("attention_22_masked_scaled_scores_1")]; + int32 softmax_45_axis_0 = const()[name = string("softmax_45_axis_0"), val = int32(-2)]; + tensor softmax_45 = softmax(axis = softmax_45_axis_0, x = attention_22_masked_scaled_scores_1)[name = string("softmax_45")]; + bool attention_22_attention_1_transpose_x_0 = const()[name = string("attention_22_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_22_attention_1_transpose_y_0 = const()[name = string("attention_22_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_22_attention_1 = matmul(transpose_x = attention_22_attention_1_transpose_x_0, transpose_y = attention_22_attention_1_transpose_y_0, x = softmax_45, y = attention_22_slice_value_cache_heads_1)[name = string("attention_22_attention_1")]; + int32 attention_22_concat_attention_all_heads_axis_0 = const()[name = string("attention_22_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_22_concat_attention_all_heads_interleave_0 = const()[name = string("attention_22_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_22_concat_attention_all_heads = concat(axis = attention_22_concat_attention_all_heads_axis_0, interleave = attention_22_concat_attention_all_heads_interleave_0, values = (attention_22_attention_0, attention_22_attention_1))[name = string("attention_22_concat_attention_all_heads")]; + tensor attention_22_channels_first_retransposed_perm_0 = const()[name = string("attention_22_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_22_reshaped_shape_0 = const()[name = string("attention_22_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_22_channels_first_retransposed = transpose(perm = attention_22_channels_first_retransposed_perm_0, x = attention_22_concat_attention_all_heads)[name = string("transpose_3")]; + tensor attention_22_reshaped = reshape(shape = attention_22_reshaped_shape_0, x = attention_22_channels_first_retransposed)[name = string("attention_22_reshaped")]; + tensor attention_22_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547587328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548189504))))[name = string("attention_22_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_106 = constexpr_blockwise_shift_scale(data = attention_22_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548218240))))[name = string("constexpr_blockwise_shift_scale_106")]; + tensor attention_22_outproj_strides_0 = const()[name = string("attention_22_outproj_strides_0"), val = tensor([1])]; + string attention_22_outproj_pad_type_0 = const()[name = string("attention_22_outproj_pad_type_0"), val = string("valid")]; + tensor attention_22_outproj_pad_0 = const()[name = string("attention_22_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_22_outproj_dilations_0 = const()[name = string("attention_22_outproj_dilations_0"), val = tensor([1])]; + int32 attention_22_outproj_groups_0 = const()[name = string("attention_22_outproj_groups_0"), val = int32(1)]; + tensor attention_22_outproj = conv(dilations = attention_22_outproj_dilations_0, groups = attention_22_outproj_groups_0, pad = attention_22_outproj_pad_0, pad_type = attention_22_outproj_pad_type_0, strides = attention_22_outproj_strides_0, weight = constexpr_blockwise_shift_scale_106, x = attention_22_reshaped)[name = string("attention_22_outproj")]; + tensor block_22_residual_1 = add(x = block_21_residual_2, y = attention_22_outproj)[name = string("block_22_residual_1")]; + tensor block_22_ffn_rmsnorm_abs = abs(x = block_22_residual_1)[name = string("block_22_ffn_rmsnorm_abs")]; + tensor block_22_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_22_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_22_ffn_rmsnorm_maxval = reduce_max(axes = block_22_ffn_rmsnorm_maxval_axes_0, keep_dims = block_22_ffn_rmsnorm_maxval_keep_dims_0, x = block_22_ffn_rmsnorm_abs)[name = string("block_22_ffn_rmsnorm_maxval")]; + fp16 block_22_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_22_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_22_ffn_rmsnorm_maxval_clipped = clip(alpha = block_22_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_22_ffn_rmsnorm_maxval_clipped_beta_0, x = block_22_ffn_rmsnorm_maxval)[name = string("block_22_ffn_rmsnorm_maxval_clipped")]; + tensor block_22_ffn_rmsnorm_scaled = real_div(x = block_22_residual_1, y = block_22_ffn_rmsnorm_maxval_clipped)[name = string("block_22_ffn_rmsnorm_scaled")]; + tensor block_22_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_22_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_22_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_22_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_22_ffn_rmsnorm_scaled)[name = string("block_22_ffn_rmsnorm_squared_sum")]; + fp16 block_22_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_22_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_22_ffn_rmsnorm_rsqrt_epsilon_0, x = block_22_ffn_rmsnorm_squared_sum)[name = string("block_22_ffn_rmsnorm_rsqrt")]; + fp16 block_22_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_22_ffn_rmsnorm_dim_scaled = mul(x = block_22_ffn_rmsnorm_scaled, y = block_22_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_22_ffn_rmsnorm_dim_scaled")]; + tensor block_22_ffn_rmsnorm_normalized = mul(x = block_22_ffn_rmsnorm_dim_scaled, y = block_22_ffn_rmsnorm_rsqrt)[name = string("block_22_ffn_rmsnorm_normalized")]; + tensor block_22_ffn_rmsnorm_y_0 = const()[name = string("block_22_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548220096)))]; + tensor block_22_ffn_rmsnorm = mul(x = block_22_ffn_rmsnorm_normalized, y = block_22_ffn_rmsnorm_y_0)[name = string("block_22_ffn_rmsnorm")]; + tensor block_22_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548221952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551490624))))[name = string("block_22_ffn_inproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_107 = constexpr_blockwise_shift_scale(data = block_22_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551646336))))[name = string("constexpr_blockwise_shift_scale_107")]; + tensor block_22_ffn_inproj_strides_0 = const()[name = string("block_22_ffn_inproj_strides_0"), val = tensor([1])]; + string block_22_ffn_inproj_pad_type_0 = const()[name = string("block_22_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_22_ffn_inproj_pad_0 = const()[name = string("block_22_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_22_ffn_inproj_dilations_0 = const()[name = string("block_22_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_22_ffn_inproj_groups_0 = const()[name = string("block_22_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_22_ffn_inproj = conv(dilations = block_22_ffn_inproj_dilations_0, groups = block_22_ffn_inproj_groups_0, pad = block_22_ffn_inproj_pad_0, pad_type = block_22_ffn_inproj_pad_type_0, strides = block_22_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_107, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_inproj")]; + tensor block_22_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551656128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554924800))))[name = string("block_22_ffn_g_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_108 = constexpr_blockwise_shift_scale(data = block_22_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555080512))))[name = string("constexpr_blockwise_shift_scale_108")]; + tensor block_22_ffn_g_strides_0 = const()[name = string("block_22_ffn_g_strides_0"), val = tensor([1])]; + string block_22_ffn_g_pad_type_0 = const()[name = string("block_22_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_22_ffn_g_pad_0 = const()[name = string("block_22_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_22_ffn_g_dilations_0 = const()[name = string("block_22_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_22_ffn_g_groups_0 = const()[name = string("block_22_ffn_g_groups_0"), val = int32(1)]; + tensor block_22_ffn_g = conv(dilations = block_22_ffn_g_dilations_0, groups = block_22_ffn_g_groups_0, pad = block_22_ffn_g_pad_0, pad_type = block_22_ffn_g_pad_type_0, strides = block_22_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_108, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_g")]; + tensor block_22_ffn_g_activation = silu(x = block_22_ffn_g)[name = string("block_22_ffn_g_activation")]; + tensor block_22_ffn_x_gated = mul(x = block_22_ffn_inproj, y = block_22_ffn_g_activation)[name = string("block_22_ffn_x_gated")]; + tensor block_22_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555090304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558358976))))[name = string("block_22_ffn_outproj_weight_dequantization")]; + tensor constexpr_blockwise_shift_scale_109 = constexpr_blockwise_shift_scale(data = block_22_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558387712))))[name = string("constexpr_blockwise_shift_scale_109")]; + tensor block_22_ffn_outproj_strides_0 = const()[name = string("block_22_ffn_outproj_strides_0"), val = tensor([1])]; + string block_22_ffn_outproj_pad_type_0 = const()[name = string("block_22_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_22_ffn_outproj_pad_0 = const()[name = string("block_22_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_22_ffn_outproj_dilations_0 = const()[name = string("block_22_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_22_ffn_outproj_groups_0 = const()[name = string("block_22_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_22_ffn_outproj = conv(dilations = block_22_ffn_outproj_dilations_0, groups = block_22_ffn_outproj_groups_0, pad = block_22_ffn_outproj_pad_0, pad_type = block_22_ffn_outproj_pad_type_0, strides = block_22_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_109, x = block_22_ffn_x_gated)[name = string("block_22_ffn_outproj")]; + tensor block_22_residual_2 = add(x = block_22_ffn_outproj, y = block_22_residual_1)[name = string("block_22_residual_2")]; + tensor block_23_attention_rmsnorm_abs = abs(x = block_22_residual_2)[name = string("block_23_attention_rmsnorm_abs")]; + tensor block_23_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_23_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_23_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_23_attention_rmsnorm_maxval = reduce_max(axes = block_23_attention_rmsnorm_maxval_axes_0, keep_dims = block_23_attention_rmsnorm_maxval_keep_dims_0, x = block_23_attention_rmsnorm_abs)[name = string("block_23_attention_rmsnorm_maxval")]; + fp16 block_23_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_23_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_23_attention_rmsnorm_maxval_clipped = clip(alpha = block_23_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_23_attention_rmsnorm_maxval_clipped_beta_0, x = block_23_attention_rmsnorm_maxval)[name = string("block_23_attention_rmsnorm_maxval_clipped")]; + tensor block_23_attention_rmsnorm_scaled = real_div(x = block_22_residual_2, y = block_23_attention_rmsnorm_maxval_clipped)[name = string("block_23_attention_rmsnorm_scaled")]; + tensor block_23_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_23_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_23_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_23_attention_rmsnorm_squared_sum_keep_dims_0, x = block_23_attention_rmsnorm_scaled)[name = string("block_23_attention_rmsnorm_squared_sum")]; + fp16 block_23_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_23_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_23_attention_rmsnorm_rsqrt_epsilon_0, x = block_23_attention_rmsnorm_squared_sum)[name = string("block_23_attention_rmsnorm_rsqrt")]; + fp16 block_23_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_23_attention_rmsnorm_dim_scaled = mul(x = block_23_attention_rmsnorm_scaled, y = block_23_attention_rmsnorm_dim_scaled_y_0)[name = string("block_23_attention_rmsnorm_dim_scaled")]; + tensor block_23_attention_rmsnorm_normalized = mul(x = block_23_attention_rmsnorm_dim_scaled, y = block_23_attention_rmsnorm_rsqrt)[name = string("block_23_attention_rmsnorm_normalized")]; + tensor block_23_attention_rmsnorm_y_0 = const()[name = string("block_23_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558389568)))]; + tensor block_23_attention_rmsnorm = mul(x = block_23_attention_rmsnorm_normalized, y = block_23_attention_rmsnorm_y_0)[name = string("block_23_attention_rmsnorm")]; + tensor attention_23_qkvproj_weight_0 = const()[name = string("attention_23_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558391424)))]; + tensor attention_23_qkvproj_bias_0 = const()[name = string("attention_23_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560455872)))]; + tensor attention_23_qkvproj_strides_0 = const()[name = string("attention_23_qkvproj_strides_0"), val = tensor([1])]; + string attention_23_qkvproj_pad_type_0 = const()[name = string("attention_23_qkvproj_pad_type_0"), val = string("valid")]; + tensor attention_23_qkvproj_pad_0 = const()[name = string("attention_23_qkvproj_pad_0"), val = tensor([0, 0])]; + tensor attention_23_qkvproj_dilations_0 = const()[name = string("attention_23_qkvproj_dilations_0"), val = tensor([1])]; + int32 attention_23_qkvproj_groups_0 = const()[name = string("attention_23_qkvproj_groups_0"), val = int32(1)]; + tensor attention_23_qkvproj = conv(bias = attention_23_qkvproj_bias_0, dilations = attention_23_qkvproj_dilations_0, groups = attention_23_qkvproj_groups_0, pad = attention_23_qkvproj_pad_0, pad_type = attention_23_qkvproj_pad_type_0, strides = attention_23_qkvproj_strides_0, weight = attention_23_qkvproj_weight_0, x = block_23_attention_rmsnorm)[name = string("attention_23_qkvproj")]; + tensor attention_23_head_reshape_shape_0 = const()[name = string("attention_23_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; + tensor attention_23_head_reshape = reshape(shape = attention_23_head_reshape_shape_0, x = attention_23_qkvproj)[name = string("attention_23_head_reshape")]; + tensor attention_23_head_transpose_perm_0 = const()[name = string("attention_23_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; + int32 attention_23_split_qkv_heads_axis_0 = const()[name = string("attention_23_split_qkv_heads_axis_0"), val = int32(1)]; + tensor attention_23_split_qkv_heads_split_sizes_0 = const()[name = string("attention_23_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; + tensor attention_23_head_transpose = transpose(perm = attention_23_head_transpose_perm_0, x = attention_23_head_reshape)[name = string("transpose_2")]; + tensor attention_23_split_qkv_heads_0, tensor attention_23_split_qkv_heads_1, tensor attention_23_split_qkv_heads_2 = split(axis = attention_23_split_qkv_heads_axis_0, split_sizes = attention_23_split_qkv_heads_split_sizes_0, x = attention_23_head_transpose)[name = string("attention_23_split_qkv_heads")]; + tensor attention_23_q_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_23_q_rope_lhs_mult")]; + int32 attention_23_q_rotate_half_split_num_splits_0 = const()[name = string("attention_23_q_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_23_q_rotate_half_split_axis_0 = const()[name = string("attention_23_q_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_23_q_rotate_half_split_0, tensor attention_23_q_rotate_half_split_1 = split(axis = attention_23_q_rotate_half_split_axis_0, num_splits = attention_23_q_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_0)[name = string("attention_23_q_rotate_half_split")]; + fp16 attention_23_q_rotate_half_neg_y_0 = const()[name = string("attention_23_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_23_q_rotate_half_neg = mul(x = attention_23_q_rotate_half_split_1, y = attention_23_q_rotate_half_neg_y_0)[name = string("attention_23_q_rotate_half_neg")]; + int32 attention_23_q_rotate_half_concat_axis_0 = const()[name = string("attention_23_q_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_23_q_rotate_half_concat_interleave_0 = const()[name = string("attention_23_q_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_23_q_rotate_half_concat = concat(axis = attention_23_q_rotate_half_concat_axis_0, interleave = attention_23_q_rotate_half_concat_interleave_0, values = (attention_23_q_rotate_half_neg, attention_23_q_rotate_half_split_0))[name = string("attention_23_q_rotate_half_concat")]; + tensor attention_23_q_rope_rhs_mult = mul(x = attention_23_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_q_rope_rhs_mult")]; + tensor attention_23_q_rope = add(x = attention_23_q_rope_lhs_mult, y = attention_23_q_rope_rhs_mult)[name = string("attention_23_q_rope")]; + tensor attention_23_k_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_23_k_rope_lhs_mult")]; + int32 attention_23_k_rotate_half_split_num_splits_0 = const()[name = string("attention_23_k_rotate_half_split_num_splits_0"), val = int32(2)]; + int32 attention_23_k_rotate_half_split_axis_0 = const()[name = string("attention_23_k_rotate_half_split_axis_0"), val = int32(3)]; + tensor attention_23_k_rotate_half_split_0, tensor attention_23_k_rotate_half_split_1 = split(axis = attention_23_k_rotate_half_split_axis_0, num_splits = attention_23_k_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_1)[name = string("attention_23_k_rotate_half_split")]; + fp16 attention_23_k_rotate_half_neg_y_0 = const()[name = string("attention_23_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; + tensor attention_23_k_rotate_half_neg = mul(x = attention_23_k_rotate_half_split_1, y = attention_23_k_rotate_half_neg_y_0)[name = string("attention_23_k_rotate_half_neg")]; + int32 attention_23_k_rotate_half_concat_axis_0 = const()[name = string("attention_23_k_rotate_half_concat_axis_0"), val = int32(3)]; + bool attention_23_k_rotate_half_concat_interleave_0 = const()[name = string("attention_23_k_rotate_half_concat_interleave_0"), val = bool(false)]; + tensor attention_23_k_rotate_half_concat = concat(axis = attention_23_k_rotate_half_concat_axis_0, interleave = attention_23_k_rotate_half_concat_interleave_0, values = (attention_23_k_rotate_half_neg, attention_23_k_rotate_half_split_0))[name = string("attention_23_k_rotate_half_concat")]; + tensor attention_23_k_rope_rhs_mult = mul(x = attention_23_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_k_rope_rhs_mult")]; + tensor attention_23_k_rope = add(x = attention_23_k_rope_lhs_mult, y = attention_23_k_rope_rhs_mult)[name = string("attention_23_k_rope")]; + int32 attention_23_q_splits_axis_0 = const()[name = string("attention_23_q_splits_axis_0"), val = int32(1)]; + int32 attention_23_q_splits_num_splits_0 = const()[name = string("attention_23_q_splits_num_splits_0"), val = int32(2)]; + tensor attention_23_q_splits_0, tensor attention_23_q_splits_1 = split(axis = attention_23_q_splits_axis_0, num_splits = attention_23_q_splits_num_splits_0, x = attention_23_q_rope)[name = string("attention_23_q_splits")]; + tensor attention_23_update_begin_0_values0_0 = const()[name = string("attention_23_update_begin_0_values0_0"), val = tensor([23])]; + tensor attention_23_update_begin_0_values1_0 = const()[name = string("attention_23_update_begin_0_values1_0"), val = tensor([0])]; + tensor attention_23_update_begin_0_values3_0 = const()[name = string("attention_23_update_begin_0_values3_0"), val = tensor([0])]; + int32 attention_23_update_begin_0_axis_0 = const()[name = string("attention_23_update_begin_0_axis_0"), val = int32(0)]; + bool attention_23_update_begin_0_interleave_0 = const()[name = string("attention_23_update_begin_0_interleave_0"), val = bool(false)]; + tensor attention_23_update_begin_0 = concat(axis = attention_23_update_begin_0_axis_0, interleave = attention_23_update_begin_0_interleave_0, values = (attention_23_update_begin_0_values0_0, attention_23_update_begin_0_values1_0, query_pos1, attention_23_update_begin_0_values3_0))[name = string("attention_23_update_begin_0")]; + tensor attention_23_update_end_0_values0_0 = const()[name = string("attention_23_update_end_0_values0_0"), val = tensor([24])]; + tensor attention_23_update_end_0_values1_0 = const()[name = string("attention_23_update_end_0_values1_0"), val = tensor([2])]; + tensor attention_23_update_end_0_values3_0 = const()[name = string("attention_23_update_end_0_values3_0"), val = tensor([64])]; + int32 attention_23_update_end_0_axis_0 = const()[name = string("attention_23_update_end_0_axis_0"), val = int32(0)]; + bool attention_23_update_end_0_interleave_0 = const()[name = string("attention_23_update_end_0_interleave_0"), val = bool(false)]; + tensor attention_23_update_end_0 = concat(axis = attention_23_update_end_0_axis_0, interleave = attention_23_update_end_0_interleave_0, values = (attention_23_update_end_0_values0_0, attention_23_update_end_0_values1_0, end_pos_0, attention_23_update_end_0_values3_0))[name = string("attention_23_update_end_0")]; + tensor attention_23_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_updated_key_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_key_cache_0_squeeze_mask_0, update = attention_23_k_rope, x = coreml_update_state_44)[name = string("attention_23_updated_key_cache_0")]; + write_state(data = attention_23_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_46 = read_state(input = key_cache_state)[name = string("coreml_update_state_94")]; + tensor attention_23_key_cache_begin_0 = const()[name = string("attention_23_key_cache_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor attention_23_key_cache_end_0 = const()[name = string("attention_23_key_cache_end_0"), val = tensor([24, 2, 512, 64])]; + tensor attention_23_key_cache_squeeze_mask_0 = const()[name = string("attention_23_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_key_cache = slice_by_index(begin = attention_23_key_cache_begin_0, end = attention_23_key_cache_end_0, squeeze_mask = attention_23_key_cache_squeeze_mask_0, x = coreml_update_state_46)[name = string("attention_23_key_cache")]; + int32 attention_23_key_cache_head_axis_0 = const()[name = string("attention_23_key_cache_head_axis_0"), val = int32(1)]; + int32 attention_23_key_cache_head_num_splits_0 = const()[name = string("attention_23_key_cache_head_num_splits_0"), val = int32(2)]; + tensor attention_23_key_cache_head_0, tensor attention_23_key_cache_head_1 = split(axis = attention_23_key_cache_head_axis_0, num_splits = attention_23_key_cache_head_num_splits_0, x = attention_23_key_cache)[name = string("attention_23_key_cache_head")]; + tensor attention_23_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_updated_value_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_value_cache_0_squeeze_mask_0, update = attention_23_split_qkv_heads_2, x = coreml_update_state_45)[name = string("attention_23_updated_value_cache_0")]; + write_state(data = attention_23_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_47 = read_state(input = value_cache_state)[name = string("coreml_update_state_95")]; + tensor attention_23_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_23_slice_current_layer_value_cache_begin_0"), val = tensor([23, 0, 0, 0])]; + tensor attention_23_slice_current_layer_value_cache_end_0 = const()[name = string("attention_23_slice_current_layer_value_cache_end_0"), val = tensor([24, 2, 512, 64])]; + tensor attention_23_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_23_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; + tensor attention_23_slice_current_layer_value_cache = slice_by_index(begin = attention_23_slice_current_layer_value_cache_begin_0, end = attention_23_slice_current_layer_value_cache_end_0, squeeze_mask = attention_23_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_47)[name = string("attention_23_slice_current_layer_value_cache")]; + int32 attention_23_slice_value_cache_heads_axis_0 = const()[name = string("attention_23_slice_value_cache_heads_axis_0"), val = int32(1)]; + int32 attention_23_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_23_slice_value_cache_heads_num_splits_0"), val = int32(2)]; + tensor attention_23_slice_value_cache_heads_0, tensor attention_23_slice_value_cache_heads_1 = split(axis = attention_23_slice_value_cache_heads_axis_0, num_splits = attention_23_slice_value_cache_heads_num_splits_0, x = attention_23_slice_current_layer_value_cache)[name = string("attention_23_slice_value_cache_heads")]; + bool attention_23_scores_0_transpose_y_0 = const()[name = string("attention_23_scores_0_transpose_y_0"), val = bool(true)]; + bool attention_23_scores_0_transpose_x_0 = const()[name = string("attention_23_scores_0_transpose_x_0"), val = bool(false)]; + tensor attention_23_scores_0 = matmul(transpose_x = attention_23_scores_0_transpose_x_0, transpose_y = attention_23_scores_0_transpose_y_0, x = attention_23_key_cache_head_0, y = attention_23_q_splits_0)[name = string("attention_23_scores_0")]; + fp16 attention_23_scaled_scores_0_y_0 = const()[name = string("attention_23_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; + tensor attention_23_scaled_scores_0 = mul(x = attention_23_scores_0, y = attention_23_scaled_scores_0_y_0)[name = string("attention_23_scaled_scores_0")]; + tensor attention_23_masked_scaled_scores_0 = add(x = attention_23_scaled_scores_0, y = transpose_0)[name = string("attention_23_masked_scaled_scores_0")]; + int32 softmax_46_axis_0 = const()[name = string("softmax_46_axis_0"), val = int32(-2)]; + tensor softmax_46 = softmax(axis = softmax_46_axis_0, x = attention_23_masked_scaled_scores_0)[name = string("softmax_46")]; + bool attention_23_attention_0_transpose_x_0 = const()[name = string("attention_23_attention_0_transpose_x_0"), val = bool(true)]; + bool attention_23_attention_0_transpose_y_0 = const()[name = string("attention_23_attention_0_transpose_y_0"), val = bool(false)]; + tensor attention_23_attention_0 = matmul(transpose_x = attention_23_attention_0_transpose_x_0, transpose_y = attention_23_attention_0_transpose_y_0, x = softmax_46, y = attention_23_slice_value_cache_heads_0)[name = string("attention_23_attention_0")]; + bool attention_23_scores_1_transpose_y_0 = const()[name = string("attention_23_scores_1_transpose_y_0"), val = bool(true)]; + bool attention_23_scores_1_transpose_x_0 = const()[name = string("attention_23_scores_1_transpose_x_0"), val = bool(false)]; + tensor attention_23_scores_1 = matmul(transpose_x = attention_23_scores_1_transpose_x_0, transpose_y = attention_23_scores_1_transpose_y_0, x = attention_23_key_cache_head_1, y = attention_23_q_splits_1)[name = string("attention_23_scores_1")]; + fp16 attention_23_scaled_scores_1_y_0 = const()[name = string("attention_23_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; + tensor attention_23_scaled_scores_1 = mul(x = attention_23_scores_1, y = attention_23_scaled_scores_1_y_0)[name = string("attention_23_scaled_scores_1")]; + tensor attention_23_masked_scaled_scores_1 = add(x = attention_23_scaled_scores_1, y = transpose_0)[name = string("attention_23_masked_scaled_scores_1")]; + int32 softmax_47_axis_0 = const()[name = string("softmax_47_axis_0"), val = int32(-2)]; + tensor softmax_47 = softmax(axis = softmax_47_axis_0, x = attention_23_masked_scaled_scores_1)[name = string("softmax_47")]; + bool attention_23_attention_1_transpose_x_0 = const()[name = string("attention_23_attention_1_transpose_x_0"), val = bool(true)]; + bool attention_23_attention_1_transpose_y_0 = const()[name = string("attention_23_attention_1_transpose_y_0"), val = bool(false)]; + tensor attention_23_attention_1 = matmul(transpose_x = attention_23_attention_1_transpose_x_0, transpose_y = attention_23_attention_1_transpose_y_0, x = softmax_47, y = attention_23_slice_value_cache_heads_1)[name = string("attention_23_attention_1")]; + int32 attention_23_concat_attention_all_heads_axis_0 = const()[name = string("attention_23_concat_attention_all_heads_axis_0"), val = int32(1)]; + bool attention_23_concat_attention_all_heads_interleave_0 = const()[name = string("attention_23_concat_attention_all_heads_interleave_0"), val = bool(false)]; + tensor attention_23_concat_attention_all_heads = concat(axis = attention_23_concat_attention_all_heads_axis_0, interleave = attention_23_concat_attention_all_heads_interleave_0, values = (attention_23_attention_0, attention_23_attention_1))[name = string("attention_23_concat_attention_all_heads")]; + tensor attention_23_channels_first_retransposed_perm_0 = const()[name = string("attention_23_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; + tensor attention_23_reshaped_shape_0 = const()[name = string("attention_23_reshaped_shape_0"), val = tensor([1, 896, 64])]; + tensor attention_23_channels_first_retransposed = transpose(perm = attention_23_channels_first_retransposed_perm_0, x = attention_23_concat_attention_all_heads)[name = string("transpose_1")]; + tensor attention_23_reshaped = reshape(shape = attention_23_reshaped_shape_0, x = attention_23_channels_first_retransposed)[name = string("attention_23_reshaped")]; + tensor attention_23_outproj_weight_0 = const()[name = string("attention_23_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560458240)))]; + tensor attention_23_outproj_strides_0 = const()[name = string("attention_23_outproj_strides_0"), val = tensor([1])]; + string attention_23_outproj_pad_type_0 = const()[name = string("attention_23_outproj_pad_type_0"), val = string("valid")]; + tensor attention_23_outproj_pad_0 = const()[name = string("attention_23_outproj_pad_0"), val = tensor([0, 0])]; + tensor attention_23_outproj_dilations_0 = const()[name = string("attention_23_outproj_dilations_0"), val = tensor([1])]; + int32 attention_23_outproj_groups_0 = const()[name = string("attention_23_outproj_groups_0"), val = int32(1)]; + tensor attention_23_outproj = conv(dilations = attention_23_outproj_dilations_0, groups = attention_23_outproj_groups_0, pad = attention_23_outproj_pad_0, pad_type = attention_23_outproj_pad_type_0, strides = attention_23_outproj_strides_0, weight = attention_23_outproj_weight_0, x = attention_23_reshaped)[name = string("attention_23_outproj")]; + tensor block_23_residual_1 = add(x = block_22_residual_2, y = attention_23_outproj)[name = string("block_23_residual_1")]; + tensor block_23_ffn_rmsnorm_abs = abs(x = block_23_residual_1)[name = string("block_23_ffn_rmsnorm_abs")]; + tensor block_23_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool block_23_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor block_23_ffn_rmsnorm_maxval = reduce_max(axes = block_23_ffn_rmsnorm_maxval_axes_0, keep_dims = block_23_ffn_rmsnorm_maxval_keep_dims_0, x = block_23_ffn_rmsnorm_abs)[name = string("block_23_ffn_rmsnorm_maxval")]; + fp16 block_23_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 block_23_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor block_23_ffn_rmsnorm_maxval_clipped = clip(alpha = block_23_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_23_ffn_rmsnorm_maxval_clipped_beta_0, x = block_23_ffn_rmsnorm_maxval)[name = string("block_23_ffn_rmsnorm_maxval_clipped")]; + tensor block_23_ffn_rmsnorm_scaled = real_div(x = block_23_residual_1, y = block_23_ffn_rmsnorm_maxval_clipped)[name = string("block_23_ffn_rmsnorm_scaled")]; + tensor block_23_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool block_23_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor block_23_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_23_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_23_ffn_rmsnorm_scaled)[name = string("block_23_ffn_rmsnorm_squared_sum")]; + fp16 block_23_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor block_23_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_23_ffn_rmsnorm_rsqrt_epsilon_0, x = block_23_ffn_rmsnorm_squared_sum)[name = string("block_23_ffn_rmsnorm_rsqrt")]; + fp16 block_23_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor block_23_ffn_rmsnorm_dim_scaled = mul(x = block_23_ffn_rmsnorm_scaled, y = block_23_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_23_ffn_rmsnorm_dim_scaled")]; + tensor block_23_ffn_rmsnorm_normalized = mul(x = block_23_ffn_rmsnorm_dim_scaled, y = block_23_ffn_rmsnorm_rsqrt)[name = string("block_23_ffn_rmsnorm_normalized")]; + tensor block_23_ffn_rmsnorm_y_0 = const()[name = string("block_23_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063936)))]; + tensor block_23_ffn_rmsnorm = mul(x = block_23_ffn_rmsnorm_normalized, y = block_23_ffn_rmsnorm_y_0)[name = string("block_23_ffn_rmsnorm")]; + tensor block_23_ffn_inproj_weight_0 = const()[name = string("block_23_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065792)))]; + tensor block_23_ffn_inproj_strides_0 = const()[name = string("block_23_ffn_inproj_strides_0"), val = tensor([1])]; + string block_23_ffn_inproj_pad_type_0 = const()[name = string("block_23_ffn_inproj_pad_type_0"), val = string("valid")]; + tensor block_23_ffn_inproj_pad_0 = const()[name = string("block_23_ffn_inproj_pad_0"), val = tensor([0, 0])]; + tensor block_23_ffn_inproj_dilations_0 = const()[name = string("block_23_ffn_inproj_dilations_0"), val = tensor([1])]; + int32 block_23_ffn_inproj_groups_0 = const()[name = string("block_23_ffn_inproj_groups_0"), val = int32(1)]; + tensor block_23_ffn_inproj = conv(dilations = block_23_ffn_inproj_dilations_0, groups = block_23_ffn_inproj_groups_0, pad = block_23_ffn_inproj_pad_0, pad_type = block_23_ffn_inproj_pad_type_0, strides = block_23_ffn_inproj_strides_0, weight = block_23_ffn_inproj_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_inproj")]; + tensor block_23_ffn_g_weight_0 = const()[name = string("block_23_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570782144)))]; + tensor block_23_ffn_g_strides_0 = const()[name = string("block_23_ffn_g_strides_0"), val = tensor([1])]; + string block_23_ffn_g_pad_type_0 = const()[name = string("block_23_ffn_g_pad_type_0"), val = string("valid")]; + tensor block_23_ffn_g_pad_0 = const()[name = string("block_23_ffn_g_pad_0"), val = tensor([0, 0])]; + tensor block_23_ffn_g_dilations_0 = const()[name = string("block_23_ffn_g_dilations_0"), val = tensor([1])]; + int32 block_23_ffn_g_groups_0 = const()[name = string("block_23_ffn_g_groups_0"), val = int32(1)]; + tensor block_23_ffn_g = conv(dilations = block_23_ffn_g_dilations_0, groups = block_23_ffn_g_groups_0, pad = block_23_ffn_g_pad_0, pad_type = block_23_ffn_g_pad_type_0, strides = block_23_ffn_g_strides_0, weight = block_23_ffn_g_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_g")]; + tensor block_23_ffn_g_activation = silu(x = block_23_ffn_g)[name = string("block_23_ffn_g_activation")]; + tensor block_23_ffn_x_gated = mul(x = block_23_ffn_inproj, y = block_23_ffn_g_activation)[name = string("block_23_ffn_x_gated")]; + tensor block_23_ffn_outproj_weight_0 = const()[name = string("block_23_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(579498496)))]; + tensor block_23_ffn_outproj_strides_0 = const()[name = string("block_23_ffn_outproj_strides_0"), val = tensor([1])]; + string block_23_ffn_outproj_pad_type_0 = const()[name = string("block_23_ffn_outproj_pad_type_0"), val = string("valid")]; + tensor block_23_ffn_outproj_pad_0 = const()[name = string("block_23_ffn_outproj_pad_0"), val = tensor([0, 0])]; + tensor block_23_ffn_outproj_dilations_0 = const()[name = string("block_23_ffn_outproj_dilations_0"), val = tensor([1])]; + int32 block_23_ffn_outproj_groups_0 = const()[name = string("block_23_ffn_outproj_groups_0"), val = int32(1)]; + tensor block_23_ffn_outproj = conv(dilations = block_23_ffn_outproj_dilations_0, groups = block_23_ffn_outproj_groups_0, pad = block_23_ffn_outproj_pad_0, pad_type = block_23_ffn_outproj_pad_type_0, strides = block_23_ffn_outproj_strides_0, weight = block_23_ffn_outproj_weight_0, x = block_23_ffn_x_gated)[name = string("block_23_ffn_outproj")]; + tensor block_23_residual_2 = add(x = block_23_ffn_outproj, y = block_23_residual_1)[name = string("block_23_residual_2")]; + tensor final_norm_rmsnorm_abs = abs(x = block_23_residual_2)[name = string("final_norm_rmsnorm_abs")]; + tensor final_norm_rmsnorm_maxval_axes_0 = const()[name = string("final_norm_rmsnorm_maxval_axes_0"), val = tensor([1])]; + bool final_norm_rmsnorm_maxval_keep_dims_0 = const()[name = string("final_norm_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; + tensor final_norm_rmsnorm_maxval = reduce_max(axes = final_norm_rmsnorm_maxval_axes_0, keep_dims = final_norm_rmsnorm_maxval_keep_dims_0, x = final_norm_rmsnorm_abs)[name = string("final_norm_rmsnorm_maxval")]; + fp16 final_norm_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; + fp16 final_norm_rmsnorm_maxval_clipped_beta_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; + tensor final_norm_rmsnorm_maxval_clipped = clip(alpha = final_norm_rmsnorm_maxval_clipped_alpha_0, beta = final_norm_rmsnorm_maxval_clipped_beta_0, x = final_norm_rmsnorm_maxval)[name = string("final_norm_rmsnorm_maxval_clipped")]; + tensor final_norm_rmsnorm_scaled = real_div(x = block_23_residual_2, y = final_norm_rmsnorm_maxval_clipped)[name = string("final_norm_rmsnorm_scaled")]; + tensor final_norm_rmsnorm_squared_sum_axes_0 = const()[name = string("final_norm_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; + bool final_norm_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("final_norm_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; + tensor final_norm_rmsnorm_squared_sum = reduce_sum_square(axes = final_norm_rmsnorm_squared_sum_axes_0, keep_dims = final_norm_rmsnorm_squared_sum_keep_dims_0, x = final_norm_rmsnorm_scaled)[name = string("final_norm_rmsnorm_squared_sum")]; + fp16 final_norm_rmsnorm_rsqrt_epsilon_0 = const()[name = string("final_norm_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; + tensor final_norm_rmsnorm_rsqrt = rsqrt(epsilon = final_norm_rmsnorm_rsqrt_epsilon_0, x = final_norm_rmsnorm_squared_sum)[name = string("final_norm_rmsnorm_rsqrt")]; + fp16 final_norm_rmsnorm_dim_scaled_y_0 = const()[name = string("final_norm_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; + tensor final_norm_rmsnorm_dim_scaled = mul(x = final_norm_rmsnorm_scaled, y = final_norm_rmsnorm_dim_scaled_y_0)[name = string("final_norm_rmsnorm_dim_scaled")]; + tensor final_norm_rmsnorm_normalized = mul(x = final_norm_rmsnorm_dim_scaled, y = final_norm_rmsnorm_rsqrt)[name = string("final_norm_rmsnorm_normalized")]; + tensor final_norm_rmsnorm_y_0 = const()[name = string("final_norm_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588214848)))]; + tensor final_norm_rmsnorm = mul(x = final_norm_rmsnorm_normalized, y = final_norm_rmsnorm_y_0)[name = string("final_norm_rmsnorm")]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588216704)))]; + tensor logits_0_strides_0 = const()[name = string("logits_0_strides_0"), val = tensor([1])]; + string logits_0_pad_type_0 = const()[name = string("logits_0_pad_type_0"), val = string("valid")]; + tensor logits_0_pad_0 = const()[name = string("logits_0_pad_0"), val = tensor([0, 0])]; + tensor logits_0_dilations_0 = const()[name = string("logits_0_dilations_0"), val = tensor([1])]; + int32 logits_0_groups_0 = const()[name = string("logits_0_groups_0"), val = int32(1)]; + tensor logits_0 = conv(dilations = logits_0_dilations_0, groups = logits_0_groups_0, pad = logits_0_pad_0, pad_type = logits_0_pad_type_0, strides = logits_0_strides_0, weight = expand_dims_1, x = final_norm_rmsnorm)[name = string("logits_0")]; + tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617576896)))]; + tensor logits_1_strides_0 = const()[name = string("logits_1_strides_0"), val = tensor([1])]; + string logits_1_pad_type_0 = const()[name = string("logits_1_pad_type_0"), val = string("valid")]; + tensor logits_1_pad_0 = const()[name = string("logits_1_pad_0"), val = tensor([0, 0])]; + tensor logits_1_dilations_0 = const()[name = string("logits_1_dilations_0"), val = tensor([1])]; + int32 logits_1_groups_0 = const()[name = string("logits_1_groups_0"), val = int32(1)]; + tensor logits_1 = conv(dilations = logits_1_dilations_0, groups = logits_1_groups_0, pad = logits_1_pad_0, pad_type = logits_1_pad_type_0, strides = logits_1_strides_0, weight = expand_dims_2, x = final_norm_rmsnorm)[name = string("logits_1")]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646937088)))]; + tensor logits_2_strides_0 = const()[name = string("logits_2_strides_0"), val = tensor([1])]; + string logits_2_pad_type_0 = const()[name = string("logits_2_pad_type_0"), val = string("valid")]; + tensor logits_2_pad_0 = const()[name = string("logits_2_pad_0"), val = tensor([0, 0])]; + tensor logits_2_dilations_0 = const()[name = string("logits_2_dilations_0"), val = tensor([1])]; + int32 logits_2_groups_0 = const()[name = string("logits_2_groups_0"), val = int32(1)]; + tensor logits_2 = conv(dilations = logits_2_dilations_0, groups = logits_2_groups_0, pad = logits_2_pad_0, pad_type = logits_2_pad_type_0, strides = logits_2_strides_0, weight = expand_dims_3, x = final_norm_rmsnorm)[name = string("logits_2")]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676297280)))]; + tensor logits_3_strides_0 = const()[name = string("logits_3_strides_0"), val = tensor([1])]; + string logits_3_pad_type_0 = const()[name = string("logits_3_pad_type_0"), val = string("valid")]; + tensor logits_3_pad_0 = const()[name = string("logits_3_pad_0"), val = tensor([0, 0])]; + tensor logits_3_dilations_0 = const()[name = string("logits_3_dilations_0"), val = tensor([1])]; + int32 logits_3_groups_0 = const()[name = string("logits_3_groups_0"), val = int32(1)]; + tensor logits_3 = conv(dilations = logits_3_dilations_0, groups = logits_3_groups_0, pad = logits_3_pad_0, pad_type = logits_3_pad_type_0, strides = logits_3_strides_0, weight = expand_dims_4, x = final_norm_rmsnorm)[name = string("logits_3")]; + tensor expand_dims_5 = const()[name = string("expand_dims_5"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705657472)))]; + tensor logits_4_strides_0 = const()[name = string("logits_4_strides_0"), val = tensor([1])]; + string logits_4_pad_type_0 = const()[name = string("logits_4_pad_type_0"), val = string("valid")]; + tensor logits_4_pad_0 = const()[name = string("logits_4_pad_0"), val = tensor([0, 0])]; + tensor logits_4_dilations_0 = const()[name = string("logits_4_dilations_0"), val = tensor([1])]; + int32 logits_4_groups_0 = const()[name = string("logits_4_groups_0"), val = int32(1)]; + tensor logits_4 = conv(dilations = logits_4_dilations_0, groups = logits_4_groups_0, pad = logits_4_pad_0, pad_type = logits_4_pad_type_0, strides = logits_4_strides_0, weight = expand_dims_5, x = final_norm_rmsnorm)[name = string("logits_4")]; + tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735017664)))]; + tensor logits_5_strides_0 = const()[name = string("logits_5_strides_0"), val = tensor([1])]; + string logits_5_pad_type_0 = const()[name = string("logits_5_pad_type_0"), val = string("valid")]; + tensor logits_5_pad_0 = const()[name = string("logits_5_pad_0"), val = tensor([0, 0])]; + tensor logits_5_dilations_0 = const()[name = string("logits_5_dilations_0"), val = tensor([1])]; + int32 logits_5_groups_0 = const()[name = string("logits_5_groups_0"), val = int32(1)]; + tensor logits_5 = conv(dilations = logits_5_dilations_0, groups = logits_5_groups_0, pad = logits_5_pad_0, pad_type = logits_5_pad_type_0, strides = logits_5_strides_0, weight = expand_dims_6, x = final_norm_rmsnorm)[name = string("logits_5")]; + tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764377856)))]; + tensor logits_6_strides_0 = const()[name = string("logits_6_strides_0"), val = tensor([1])]; + string logits_6_pad_type_0 = const()[name = string("logits_6_pad_type_0"), val = string("valid")]; + tensor logits_6_pad_0 = const()[name = string("logits_6_pad_0"), val = tensor([0, 0])]; + tensor logits_6_dilations_0 = const()[name = string("logits_6_dilations_0"), val = tensor([1])]; + int32 logits_6_groups_0 = const()[name = string("logits_6_groups_0"), val = int32(1)]; + tensor logits_6 = conv(dilations = logits_6_dilations_0, groups = logits_6_groups_0, pad = logits_6_pad_0, pad_type = logits_6_pad_type_0, strides = logits_6_strides_0, weight = expand_dims_7, x = final_norm_rmsnorm)[name = string("logits_6")]; + tensor expand_dims_8 = const()[name = string("expand_dims_8"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793738048)))]; + tensor logits_7_strides_0 = const()[name = string("logits_7_strides_0"), val = tensor([1])]; + string logits_7_pad_type_0 = const()[name = string("logits_7_pad_type_0"), val = string("valid")]; + tensor logits_7_pad_0 = const()[name = string("logits_7_pad_0"), val = tensor([0, 0])]; + tensor logits_7_dilations_0 = const()[name = string("logits_7_dilations_0"), val = tensor([1])]; + int32 logits_7_groups_0 = const()[name = string("logits_7_groups_0"), val = int32(1)]; + tensor logits_7 = conv(dilations = logits_7_dilations_0, groups = logits_7_groups_0, pad = logits_7_pad_0, pad_type = logits_7_pad_type_0, strides = logits_7_strides_0, weight = expand_dims_8, x = final_norm_rmsnorm)[name = string("logits_7")]; + tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823098240)))]; + tensor logits_8_strides_0 = const()[name = string("logits_8_strides_0"), val = tensor([1])]; + string logits_8_pad_type_0 = const()[name = string("logits_8_pad_type_0"), val = string("valid")]; + tensor logits_8_pad_0 = const()[name = string("logits_8_pad_0"), val = tensor([0, 0])]; + tensor logits_8_dilations_0 = const()[name = string("logits_8_dilations_0"), val = tensor([1])]; + int32 logits_8_groups_0 = const()[name = string("logits_8_groups_0"), val = int32(1)]; + tensor logits_8 = conv(dilations = logits_8_dilations_0, groups = logits_8_groups_0, pad = logits_8_pad_0, pad_type = logits_8_pad_type_0, strides = logits_8_strides_0, weight = expand_dims_9, x = final_norm_rmsnorm)[name = string("logits_8")]; + tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852458432)))]; + tensor logits_9_strides_0 = const()[name = string("logits_9_strides_0"), val = tensor([1])]; + string logits_9_pad_type_0 = const()[name = string("logits_9_pad_type_0"), val = string("valid")]; + tensor logits_9_pad_0 = const()[name = string("logits_9_pad_0"), val = tensor([0, 0])]; + tensor logits_9_dilations_0 = const()[name = string("logits_9_dilations_0"), val = tensor([1])]; + int32 logits_9_groups_0 = const()[name = string("logits_9_groups_0"), val = int32(1)]; + tensor logits_9 = conv(dilations = logits_9_dilations_0, groups = logits_9_groups_0, pad = logits_9_pad_0, pad_type = logits_9_pad_type_0, strides = logits_9_strides_0, weight = expand_dims_10, x = final_norm_rmsnorm)[name = string("logits_9")]; + int32 _logits_axis_0 = const()[name = string("_logits_axis_0"), val = int32(1)]; + bool _logits_interleave_0 = const()[name = string("_logits_interleave_0"), val = bool(false)]; + tensor _logits = concat(axis = _logits_axis_0, interleave = _logits_interleave_0, values = (logits_0, logits_1, logits_2, logits_3, logits_4, logits_5, logits_6, logits_7, logits_8, logits_9))[name = string("_logits")]; + string logits_dtype_0 = const()[name = string("logits_dtype_0"), val = string("fp32")]; + tensor logits = cast(dtype = logits_dtype_0, x = _logits)[name = string("cast_0")]; + } -> (logits); +} \ No newline at end of file