program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.3.14.1"}})] { func length_1(tensor input_ids, state> key_cache_state, tensor query_pos1, state> value_cache_state) { tensor expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor([-1, -2])]; tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = query_pos1)[name = string("expand_dims_0")]; tensor mask_gather_x_0 = const()[name = string("mask_gather_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 mask_gather_axis_0 = const()[name = string("mask_gather_axis_0"), val = int32(0)]; int32 mask_gather_batch_dims_0 = const()[name = string("mask_gather_batch_dims_0"), val = int32(0)]; bool mask_gather_validate_indices_0 = const()[name = string("mask_gather_validate_indices_0"), val = bool(false)]; tensor mask_gather = gather(axis = mask_gather_axis_0, batch_dims = mask_gather_batch_dims_0, indices = expand_dims_0, validate_indices = mask_gather_validate_indices_0, x = mask_gather_x_0)[name = string("mask_gather")]; tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([0, 1, 3, 2])]; tensor query_sin_emb_x_0 = const()[name = string("query_sin_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524416)))]; int32 query_sin_emb_axis_0 = const()[name = string("query_sin_emb_axis_0"), val = int32(0)]; int32 query_sin_emb_batch_dims_0 = const()[name = string("query_sin_emb_batch_dims_0"), val = int32(0)]; bool query_sin_emb_validate_indices_0 = const()[name = string("query_sin_emb_validate_indices_0"), val = bool(false)]; tensor query_sin_emb = gather(axis = query_sin_emb_axis_0, batch_dims = query_sin_emb_batch_dims_0, indices = expand_dims_0, validate_indices = query_sin_emb_validate_indices_0, x = query_sin_emb_x_0)[name = string("query_sin_emb")]; tensor query_cos_emb_x_0 = const()[name = string("query_cos_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(590016)))]; int32 query_cos_emb_axis_0 = const()[name = string("query_cos_emb_axis_0"), val = int32(0)]; int32 query_cos_emb_batch_dims_0 = const()[name = string("query_cos_emb_batch_dims_0"), val = int32(0)]; bool query_cos_emb_validate_indices_0 = const()[name = string("query_cos_emb_validate_indices_0"), val = bool(false)]; tensor query_cos_emb = gather(axis = query_cos_emb_axis_0, batch_dims = query_cos_emb_batch_dims_0, indices = expand_dims_0, validate_indices = query_cos_emb_validate_indices_0, x = query_cos_emb_x_0)[name = string("query_cos_emb")]; tensor token_embedding_x_0 = const()[name = string("token_embedding_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655616)))]; int32 token_embedding_axis_0 = const()[name = string("token_embedding_axis_0"), val = int32(0)]; int32 token_embedding_batch_dims_0 = const()[name = string("token_embedding_batch_dims_0"), val = int32(0)]; bool token_embedding_validate_indices_0 = const()[name = string("token_embedding_validate_indices_0"), val = bool(false)]; tensor token_embedding = gather(axis = token_embedding_axis_0, batch_dims = token_embedding_batch_dims_0, indices = input_ids, validate_indices = token_embedding_validate_indices_0, x = token_embedding_x_0)[name = string("token_embedding")]; tensor input_embeddings_channels_first_perm_0 = const()[name = string("input_embeddings_channels_first_perm_0"), val = tensor([0, 2, 1])]; int32 end_pos_0_x_0 = const()[name = string("end_pos_0_x_0"), val = int32(1)]; tensor end_pos_0 = add(x = end_pos_0_x_0, y = query_pos1)[name = string("end_pos_0")]; tensor read_state_0 = read_state(input = key_cache_state)[name = string("read_state_0")]; tensor read_state_1 = read_state(input = value_cache_state)[name = string("read_state_1")]; tensor input_embeddings_channels_first = transpose(perm = input_embeddings_channels_first_perm_0, x = token_embedding)[name = string("transpose_49")]; tensor block_0_attention_rmsnorm_abs = abs(x = input_embeddings_channels_first)[name = string("block_0_attention_rmsnorm_abs")]; tensor block_0_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_0_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_0_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_0_attention_rmsnorm_maxval = reduce_max(axes = block_0_attention_rmsnorm_maxval_axes_0, keep_dims = block_0_attention_rmsnorm_maxval_keep_dims_0, x = block_0_attention_rmsnorm_abs)[name = string("block_0_attention_rmsnorm_maxval")]; fp16 block_0_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_0_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_0_attention_rmsnorm_maxval_clipped = clip(alpha = block_0_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_0_attention_rmsnorm_maxval_clipped_beta_0, x = block_0_attention_rmsnorm_maxval)[name = string("block_0_attention_rmsnorm_maxval_clipped")]; tensor block_0_attention_rmsnorm_scaled = real_div(x = input_embeddings_channels_first, y = block_0_attention_rmsnorm_maxval_clipped)[name = string("block_0_attention_rmsnorm_scaled")]; tensor block_0_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_0_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_0_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_0_attention_rmsnorm_squared_sum_keep_dims_0, x = block_0_attention_rmsnorm_scaled)[name = string("block_0_attention_rmsnorm_squared_sum")]; fp16 block_0_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_0_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_0_attention_rmsnorm_rsqrt_epsilon_0, x = block_0_attention_rmsnorm_squared_sum)[name = string("block_0_attention_rmsnorm_rsqrt")]; fp16 block_0_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_0_attention_rmsnorm_dim_scaled = mul(x = block_0_attention_rmsnorm_scaled, y = block_0_attention_rmsnorm_dim_scaled_y_0)[name = string("block_0_attention_rmsnorm_dim_scaled")]; tensor block_0_attention_rmsnorm_normalized = mul(x = block_0_attention_rmsnorm_dim_scaled, y = block_0_attention_rmsnorm_rsqrt)[name = string("block_0_attention_rmsnorm_normalized")]; tensor block_0_attention_rmsnorm_y_0 = const()[name = string("block_0_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272924992)))]; tensor block_0_attention_rmsnorm = mul(x = block_0_attention_rmsnorm_normalized, y = block_0_attention_rmsnorm_y_0)[name = string("block_0_attention_rmsnorm")]; tensor attention_0_qkvproj_weight_0 = const()[name = string("attention_0_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272926848)))]; tensor attention_0_qkvproj_bias_0 = const()[name = string("attention_0_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274991296)))]; tensor attention_0_qkvproj_strides_0 = const()[name = string("attention_0_qkvproj_strides_0"), val = tensor([1])]; string attention_0_qkvproj_pad_type_0 = const()[name = string("attention_0_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_0_qkvproj_pad_0 = const()[name = string("attention_0_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_0_qkvproj_dilations_0 = const()[name = string("attention_0_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_0_qkvproj_groups_0 = const()[name = string("attention_0_qkvproj_groups_0"), val = int32(1)]; tensor attention_0_qkvproj = conv(bias = attention_0_qkvproj_bias_0, dilations = attention_0_qkvproj_dilations_0, groups = attention_0_qkvproj_groups_0, pad = attention_0_qkvproj_pad_0, pad_type = attention_0_qkvproj_pad_type_0, strides = attention_0_qkvproj_strides_0, weight = attention_0_qkvproj_weight_0, x = block_0_attention_rmsnorm)[name = string("attention_0_qkvproj")]; tensor attention_0_head_reshape_shape_0 = const()[name = string("attention_0_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_0_head_reshape = reshape(shape = attention_0_head_reshape_shape_0, x = attention_0_qkvproj)[name = string("attention_0_head_reshape")]; tensor attention_0_head_transpose_perm_0 = const()[name = string("attention_0_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_0_split_qkv_heads_axis_0 = const()[name = string("attention_0_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_0_split_qkv_heads_split_sizes_0 = const()[name = string("attention_0_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_0_head_transpose = transpose(perm = attention_0_head_transpose_perm_0, x = attention_0_head_reshape)[name = string("transpose_48")]; tensor attention_0_split_qkv_heads_0, tensor attention_0_split_qkv_heads_1, tensor attention_0_split_qkv_heads_2 = split(axis = attention_0_split_qkv_heads_axis_0, split_sizes = attention_0_split_qkv_heads_split_sizes_0, x = attention_0_head_transpose)[name = string("attention_0_split_qkv_heads")]; tensor attention_0_q_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_0_q_rope_lhs_mult")]; int32 attention_0_q_rotate_half_split_num_splits_0 = const()[name = string("attention_0_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_0_q_rotate_half_split_axis_0 = const()[name = string("attention_0_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_0_q_rotate_half_split_0, tensor attention_0_q_rotate_half_split_1 = split(axis = attention_0_q_rotate_half_split_axis_0, num_splits = attention_0_q_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_0)[name = string("attention_0_q_rotate_half_split")]; fp16 attention_0_q_rotate_half_neg_y_0 = const()[name = string("attention_0_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_0_q_rotate_half_neg = mul(x = attention_0_q_rotate_half_split_1, y = attention_0_q_rotate_half_neg_y_0)[name = string("attention_0_q_rotate_half_neg")]; int32 attention_0_q_rotate_half_concat_axis_0 = const()[name = string("attention_0_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_0_q_rotate_half_concat_interleave_0 = const()[name = string("attention_0_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_0_q_rotate_half_concat = concat(axis = attention_0_q_rotate_half_concat_axis_0, interleave = attention_0_q_rotate_half_concat_interleave_0, values = (attention_0_q_rotate_half_neg, attention_0_q_rotate_half_split_0))[name = string("attention_0_q_rotate_half_concat")]; tensor attention_0_q_rope_rhs_mult = mul(x = attention_0_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_q_rope_rhs_mult")]; tensor attention_0_q_rope = add(x = attention_0_q_rope_lhs_mult, y = attention_0_q_rope_rhs_mult)[name = string("attention_0_q_rope")]; tensor attention_0_k_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_0_k_rope_lhs_mult")]; int32 attention_0_k_rotate_half_split_num_splits_0 = const()[name = string("attention_0_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_0_k_rotate_half_split_axis_0 = const()[name = string("attention_0_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_0_k_rotate_half_split_0, tensor attention_0_k_rotate_half_split_1 = split(axis = attention_0_k_rotate_half_split_axis_0, num_splits = attention_0_k_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_1)[name = string("attention_0_k_rotate_half_split")]; fp16 attention_0_k_rotate_half_neg_y_0 = const()[name = string("attention_0_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_0_k_rotate_half_neg = mul(x = attention_0_k_rotate_half_split_1, y = attention_0_k_rotate_half_neg_y_0)[name = string("attention_0_k_rotate_half_neg")]; int32 attention_0_k_rotate_half_concat_axis_0 = const()[name = string("attention_0_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_0_k_rotate_half_concat_interleave_0 = const()[name = string("attention_0_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_0_k_rotate_half_concat = concat(axis = attention_0_k_rotate_half_concat_axis_0, interleave = attention_0_k_rotate_half_concat_interleave_0, values = (attention_0_k_rotate_half_neg, attention_0_k_rotate_half_split_0))[name = string("attention_0_k_rotate_half_concat")]; tensor attention_0_k_rope_rhs_mult = mul(x = attention_0_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_k_rope_rhs_mult")]; tensor attention_0_k_rope = add(x = attention_0_k_rope_lhs_mult, y = attention_0_k_rope_rhs_mult)[name = string("attention_0_k_rope")]; int32 attention_0_q_splits_axis_0 = const()[name = string("attention_0_q_splits_axis_0"), val = int32(1)]; int32 attention_0_q_splits_num_splits_0 = const()[name = string("attention_0_q_splits_num_splits_0"), val = int32(2)]; tensor attention_0_q_splits_0, tensor attention_0_q_splits_1 = split(axis = attention_0_q_splits_axis_0, num_splits = attention_0_q_splits_num_splits_0, x = attention_0_q_rope)[name = string("attention_0_q_splits")]; tensor attention_0_update_begin_0_values0_0 = const()[name = string("attention_0_update_begin_0_values0_0"), val = tensor([0])]; tensor attention_0_update_begin_0_values1_0 = const()[name = string("attention_0_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_0_update_begin_0_values3_0 = const()[name = string("attention_0_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_0_update_begin_0_axis_0 = const()[name = string("attention_0_update_begin_0_axis_0"), val = int32(0)]; bool attention_0_update_begin_0_interleave_0 = const()[name = string("attention_0_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_0_update_begin_0 = concat(axis = attention_0_update_begin_0_axis_0, interleave = attention_0_update_begin_0_interleave_0, values = (attention_0_update_begin_0_values0_0, attention_0_update_begin_0_values1_0, query_pos1, attention_0_update_begin_0_values3_0))[name = string("attention_0_update_begin_0")]; tensor attention_0_update_end_0_values0_0 = const()[name = string("attention_0_update_end_0_values0_0"), val = tensor([1])]; tensor attention_0_update_end_0_values1_0 = const()[name = string("attention_0_update_end_0_values1_0"), val = tensor([2])]; tensor attention_0_update_end_0_values3_0 = const()[name = string("attention_0_update_end_0_values3_0"), val = tensor([64])]; int32 attention_0_update_end_0_axis_0 = const()[name = string("attention_0_update_end_0_axis_0"), val = int32(0)]; bool attention_0_update_end_0_interleave_0 = const()[name = string("attention_0_update_end_0_interleave_0"), val = bool(false)]; tensor attention_0_update_end_0 = concat(axis = attention_0_update_end_0_axis_0, interleave = attention_0_update_end_0_interleave_0, values = (attention_0_update_end_0_values0_0, attention_0_update_end_0_values1_0, end_pos_0, attention_0_update_end_0_values3_0))[name = string("attention_0_update_end_0")]; tensor attention_0_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_updated_key_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_key_cache_0_squeeze_mask_0, update = attention_0_k_rope, x = read_state_0)[name = string("attention_0_updated_key_cache_0")]; write_state(data = attention_0_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_0_write_state")]; tensor coreml_update_state_0 = read_state(input = key_cache_state)[name = string("coreml_update_state_0")]; tensor attention_0_key_cache_begin_0 = const()[name = string("attention_0_key_cache_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_0_key_cache_end_0 = const()[name = string("attention_0_key_cache_end_0"), val = tensor([1, 2, 512, 64])]; tensor attention_0_key_cache_squeeze_mask_0 = const()[name = string("attention_0_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_key_cache = slice_by_index(begin = attention_0_key_cache_begin_0, end = attention_0_key_cache_end_0, squeeze_mask = attention_0_key_cache_squeeze_mask_0, x = coreml_update_state_0)[name = string("attention_0_key_cache")]; int32 attention_0_key_cache_head_axis_0 = const()[name = string("attention_0_key_cache_head_axis_0"), val = int32(1)]; int32 attention_0_key_cache_head_num_splits_0 = const()[name = string("attention_0_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_0_key_cache_head_0, tensor attention_0_key_cache_head_1 = split(axis = attention_0_key_cache_head_axis_0, num_splits = attention_0_key_cache_head_num_splits_0, x = attention_0_key_cache)[name = string("attention_0_key_cache_head")]; tensor attention_0_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_updated_value_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_value_cache_0_squeeze_mask_0, update = attention_0_split_qkv_heads_2, x = read_state_1)[name = string("attention_0_updated_value_cache_0")]; write_state(data = attention_0_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_1_write_state")]; tensor coreml_update_state_1 = read_state(input = value_cache_state)[name = string("coreml_update_state_1")]; tensor attention_0_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_0_slice_current_layer_value_cache_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_0_slice_current_layer_value_cache_end_0 = const()[name = string("attention_0_slice_current_layer_value_cache_end_0"), val = tensor([1, 2, 512, 64])]; tensor attention_0_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_0_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_slice_current_layer_value_cache = slice_by_index(begin = attention_0_slice_current_layer_value_cache_begin_0, end = attention_0_slice_current_layer_value_cache_end_0, squeeze_mask = attention_0_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_1)[name = string("attention_0_slice_current_layer_value_cache")]; int32 attention_0_slice_value_cache_heads_axis_0 = const()[name = string("attention_0_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_0_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_0_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_0_slice_value_cache_heads_0, tensor attention_0_slice_value_cache_heads_1 = split(axis = attention_0_slice_value_cache_heads_axis_0, num_splits = attention_0_slice_value_cache_heads_num_splits_0, x = attention_0_slice_current_layer_value_cache)[name = string("attention_0_slice_value_cache_heads")]; bool attention_0_scores_0_transpose_y_0 = const()[name = string("attention_0_scores_0_transpose_y_0"), val = bool(true)]; bool attention_0_scores_0_transpose_x_0 = const()[name = string("attention_0_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_0_scores_0 = matmul(transpose_x = attention_0_scores_0_transpose_x_0, transpose_y = attention_0_scores_0_transpose_y_0, x = attention_0_key_cache_head_0, y = attention_0_q_splits_0)[name = string("attention_0_scores_0")]; fp16 attention_0_scaled_scores_0_y_0 = const()[name = string("attention_0_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_0_scaled_scores_0 = mul(x = attention_0_scores_0, y = attention_0_scaled_scores_0_y_0)[name = string("attention_0_scaled_scores_0")]; tensor transpose_0 = transpose(perm = transpose_0_perm_0, x = mask_gather)[name = string("transpose_50")]; tensor attention_0_masked_scaled_scores_0 = add(x = attention_0_scaled_scores_0, y = transpose_0)[name = string("attention_0_masked_scaled_scores_0")]; int32 softmax_0_axis_0 = const()[name = string("softmax_0_axis_0"), val = int32(-2)]; tensor softmax_0 = softmax(axis = softmax_0_axis_0, x = attention_0_masked_scaled_scores_0)[name = string("softmax_0")]; bool attention_0_attention_0_transpose_x_0 = const()[name = string("attention_0_attention_0_transpose_x_0"), val = bool(true)]; bool attention_0_attention_0_transpose_y_0 = const()[name = string("attention_0_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_0_attention_0 = matmul(transpose_x = attention_0_attention_0_transpose_x_0, transpose_y = attention_0_attention_0_transpose_y_0, x = softmax_0, y = attention_0_slice_value_cache_heads_0)[name = string("attention_0_attention_0")]; bool attention_0_scores_1_transpose_y_0 = const()[name = string("attention_0_scores_1_transpose_y_0"), val = bool(true)]; bool attention_0_scores_1_transpose_x_0 = const()[name = string("attention_0_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_0_scores_1 = matmul(transpose_x = attention_0_scores_1_transpose_x_0, transpose_y = attention_0_scores_1_transpose_y_0, x = attention_0_key_cache_head_1, y = attention_0_q_splits_1)[name = string("attention_0_scores_1")]; fp16 attention_0_scaled_scores_1_y_0 = const()[name = string("attention_0_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_0_scaled_scores_1 = mul(x = attention_0_scores_1, y = attention_0_scaled_scores_1_y_0)[name = string("attention_0_scaled_scores_1")]; tensor attention_0_masked_scaled_scores_1 = add(x = attention_0_scaled_scores_1, y = transpose_0)[name = string("attention_0_masked_scaled_scores_1")]; int32 softmax_1_axis_0 = const()[name = string("softmax_1_axis_0"), val = int32(-2)]; tensor softmax_1 = softmax(axis = softmax_1_axis_0, x = attention_0_masked_scaled_scores_1)[name = string("softmax_1")]; bool attention_0_attention_1_transpose_x_0 = const()[name = string("attention_0_attention_1_transpose_x_0"), val = bool(true)]; bool attention_0_attention_1_transpose_y_0 = const()[name = string("attention_0_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_0_attention_1 = matmul(transpose_x = attention_0_attention_1_transpose_x_0, transpose_y = attention_0_attention_1_transpose_y_0, x = softmax_1, y = attention_0_slice_value_cache_heads_1)[name = string("attention_0_attention_1")]; int32 attention_0_concat_attention_all_heads_axis_0 = const()[name = string("attention_0_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_0_concat_attention_all_heads_interleave_0 = const()[name = string("attention_0_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_0_concat_attention_all_heads = concat(axis = attention_0_concat_attention_all_heads_axis_0, interleave = attention_0_concat_attention_all_heads_interleave_0, values = (attention_0_attention_0, attention_0_attention_1))[name = string("attention_0_concat_attention_all_heads")]; tensor attention_0_channels_first_retransposed_perm_0 = const()[name = string("attention_0_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_0_reshaped_shape_0 = const()[name = string("attention_0_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_0_channels_first_retransposed = transpose(perm = attention_0_channels_first_retransposed_perm_0, x = attention_0_concat_attention_all_heads)[name = string("transpose_47")]; tensor attention_0_reshaped = reshape(shape = attention_0_reshaped_shape_0, x = attention_0_channels_first_retransposed)[name = string("attention_0_reshaped")]; tensor attention_0_outproj_weight_0 = const()[name = string("attention_0_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274993664)))]; tensor attention_0_outproj_strides_0 = const()[name = string("attention_0_outproj_strides_0"), val = tensor([1])]; string attention_0_outproj_pad_type_0 = const()[name = string("attention_0_outproj_pad_type_0"), val = string("valid")]; tensor attention_0_outproj_pad_0 = const()[name = string("attention_0_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_0_outproj_dilations_0 = const()[name = string("attention_0_outproj_dilations_0"), val = tensor([1])]; int32 attention_0_outproj_groups_0 = const()[name = string("attention_0_outproj_groups_0"), val = int32(1)]; tensor attention_0_outproj = conv(dilations = attention_0_outproj_dilations_0, groups = attention_0_outproj_groups_0, pad = attention_0_outproj_pad_0, pad_type = attention_0_outproj_pad_type_0, strides = attention_0_outproj_strides_0, weight = attention_0_outproj_weight_0, x = attention_0_reshaped)[name = string("attention_0_outproj")]; tensor block_0_residual_1 = add(x = input_embeddings_channels_first, y = attention_0_outproj)[name = string("block_0_residual_1")]; tensor block_0_ffn_rmsnorm_abs = abs(x = block_0_residual_1)[name = string("block_0_ffn_rmsnorm_abs")]; tensor block_0_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_0_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_0_ffn_rmsnorm_maxval = reduce_max(axes = block_0_ffn_rmsnorm_maxval_axes_0, keep_dims = block_0_ffn_rmsnorm_maxval_keep_dims_0, x = block_0_ffn_rmsnorm_abs)[name = string("block_0_ffn_rmsnorm_maxval")]; fp16 block_0_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_0_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_0_ffn_rmsnorm_maxval_clipped = clip(alpha = block_0_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_0_ffn_rmsnorm_maxval_clipped_beta_0, x = block_0_ffn_rmsnorm_maxval)[name = string("block_0_ffn_rmsnorm_maxval_clipped")]; tensor block_0_ffn_rmsnorm_scaled = real_div(x = block_0_residual_1, y = block_0_ffn_rmsnorm_maxval_clipped)[name = string("block_0_ffn_rmsnorm_scaled")]; tensor block_0_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_0_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_0_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_0_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_0_ffn_rmsnorm_scaled)[name = string("block_0_ffn_rmsnorm_squared_sum")]; fp16 block_0_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_0_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_0_ffn_rmsnorm_rsqrt_epsilon_0, x = block_0_ffn_rmsnorm_squared_sum)[name = string("block_0_ffn_rmsnorm_rsqrt")]; fp16 block_0_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_0_ffn_rmsnorm_dim_scaled = mul(x = block_0_ffn_rmsnorm_scaled, y = block_0_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_0_ffn_rmsnorm_dim_scaled")]; tensor block_0_ffn_rmsnorm_normalized = mul(x = block_0_ffn_rmsnorm_dim_scaled, y = block_0_ffn_rmsnorm_rsqrt)[name = string("block_0_ffn_rmsnorm_normalized")]; tensor block_0_ffn_rmsnorm_y_0 = const()[name = string("block_0_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276599360)))]; tensor block_0_ffn_rmsnorm = mul(x = block_0_ffn_rmsnorm_normalized, y = block_0_ffn_rmsnorm_y_0)[name = string("block_0_ffn_rmsnorm")]; tensor block_0_ffn_inproj_weight_0 = const()[name = string("block_0_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276601216)))]; tensor block_0_ffn_inproj_strides_0 = const()[name = string("block_0_ffn_inproj_strides_0"), val = tensor([1])]; string block_0_ffn_inproj_pad_type_0 = const()[name = string("block_0_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_0_ffn_inproj_pad_0 = const()[name = string("block_0_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_0_ffn_inproj_dilations_0 = const()[name = string("block_0_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_0_ffn_inproj_groups_0 = const()[name = string("block_0_ffn_inproj_groups_0"), val = int32(1)]; tensor block_0_ffn_inproj = conv(dilations = block_0_ffn_inproj_dilations_0, groups = block_0_ffn_inproj_groups_0, pad = block_0_ffn_inproj_pad_0, pad_type = block_0_ffn_inproj_pad_type_0, strides = block_0_ffn_inproj_strides_0, weight = block_0_ffn_inproj_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_inproj")]; tensor block_0_ffn_g_weight_0 = const()[name = string("block_0_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285317568)))]; tensor block_0_ffn_g_strides_0 = const()[name = string("block_0_ffn_g_strides_0"), val = tensor([1])]; string block_0_ffn_g_pad_type_0 = const()[name = string("block_0_ffn_g_pad_type_0"), val = string("valid")]; tensor block_0_ffn_g_pad_0 = const()[name = string("block_0_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_0_ffn_g_dilations_0 = const()[name = string("block_0_ffn_g_dilations_0"), val = tensor([1])]; int32 block_0_ffn_g_groups_0 = const()[name = string("block_0_ffn_g_groups_0"), val = int32(1)]; tensor block_0_ffn_g = conv(dilations = block_0_ffn_g_dilations_0, groups = block_0_ffn_g_groups_0, pad = block_0_ffn_g_pad_0, pad_type = block_0_ffn_g_pad_type_0, strides = block_0_ffn_g_strides_0, weight = block_0_ffn_g_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_g")]; tensor block_0_ffn_g_activation = silu(x = block_0_ffn_g)[name = string("block_0_ffn_g_activation")]; tensor block_0_ffn_x_gated = mul(x = block_0_ffn_inproj, y = block_0_ffn_g_activation)[name = string("block_0_ffn_x_gated")]; tensor block_0_ffn_outproj_weight_0 = const()[name = string("block_0_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294033920)))]; tensor block_0_ffn_outproj_strides_0 = const()[name = string("block_0_ffn_outproj_strides_0"), val = tensor([1])]; string block_0_ffn_outproj_pad_type_0 = const()[name = string("block_0_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_0_ffn_outproj_pad_0 = const()[name = string("block_0_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_0_ffn_outproj_dilations_0 = const()[name = string("block_0_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_0_ffn_outproj_groups_0 = const()[name = string("block_0_ffn_outproj_groups_0"), val = int32(1)]; tensor block_0_ffn_outproj = conv(dilations = block_0_ffn_outproj_dilations_0, groups = block_0_ffn_outproj_groups_0, pad = block_0_ffn_outproj_pad_0, pad_type = block_0_ffn_outproj_pad_type_0, strides = block_0_ffn_outproj_strides_0, weight = block_0_ffn_outproj_weight_0, x = block_0_ffn_x_gated)[name = string("block_0_ffn_outproj")]; tensor block_0_residual_2 = add(x = block_0_ffn_outproj, y = block_0_residual_1)[name = string("block_0_residual_2")]; tensor block_1_attention_rmsnorm_abs = abs(x = block_0_residual_2)[name = string("block_1_attention_rmsnorm_abs")]; tensor block_1_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_1_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_1_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_1_attention_rmsnorm_maxval = reduce_max(axes = block_1_attention_rmsnorm_maxval_axes_0, keep_dims = block_1_attention_rmsnorm_maxval_keep_dims_0, x = block_1_attention_rmsnorm_abs)[name = string("block_1_attention_rmsnorm_maxval")]; fp16 block_1_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_1_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_1_attention_rmsnorm_maxval_clipped = clip(alpha = block_1_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_1_attention_rmsnorm_maxval_clipped_beta_0, x = block_1_attention_rmsnorm_maxval)[name = string("block_1_attention_rmsnorm_maxval_clipped")]; tensor block_1_attention_rmsnorm_scaled = real_div(x = block_0_residual_2, y = block_1_attention_rmsnorm_maxval_clipped)[name = string("block_1_attention_rmsnorm_scaled")]; tensor block_1_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_1_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_1_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_1_attention_rmsnorm_squared_sum_keep_dims_0, x = block_1_attention_rmsnorm_scaled)[name = string("block_1_attention_rmsnorm_squared_sum")]; fp16 block_1_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_1_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_1_attention_rmsnorm_rsqrt_epsilon_0, x = block_1_attention_rmsnorm_squared_sum)[name = string("block_1_attention_rmsnorm_rsqrt")]; fp16 block_1_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_1_attention_rmsnorm_dim_scaled = mul(x = block_1_attention_rmsnorm_scaled, y = block_1_attention_rmsnorm_dim_scaled_y_0)[name = string("block_1_attention_rmsnorm_dim_scaled")]; tensor block_1_attention_rmsnorm_normalized = mul(x = block_1_attention_rmsnorm_dim_scaled, y = block_1_attention_rmsnorm_rsqrt)[name = string("block_1_attention_rmsnorm_normalized")]; tensor block_1_attention_rmsnorm_y_0 = const()[name = string("block_1_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302750272)))]; tensor block_1_attention_rmsnorm = mul(x = block_1_attention_rmsnorm_normalized, y = block_1_attention_rmsnorm_y_0)[name = string("block_1_attention_rmsnorm")]; tensor attention_1_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302752128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303526336))))[name = string("attention_1_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_0 = constexpr_blockwise_shift_scale(data = attention_1_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303563264))))[name = string("constexpr_blockwise_shift_scale_0")]; tensor attention_1_qkvproj_bias_0 = const()[name = string("attention_1_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303565632)))]; tensor attention_1_qkvproj_strides_0 = const()[name = string("attention_1_qkvproj_strides_0"), val = tensor([1])]; string attention_1_qkvproj_pad_type_0 = const()[name = string("attention_1_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_1_qkvproj_pad_0 = const()[name = string("attention_1_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_1_qkvproj_dilations_0 = const()[name = string("attention_1_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_1_qkvproj_groups_0 = const()[name = string("attention_1_qkvproj_groups_0"), val = int32(1)]; tensor attention_1_qkvproj = conv(bias = attention_1_qkvproj_bias_0, dilations = attention_1_qkvproj_dilations_0, groups = attention_1_qkvproj_groups_0, pad = attention_1_qkvproj_pad_0, pad_type = attention_1_qkvproj_pad_type_0, strides = attention_1_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_0, x = block_1_attention_rmsnorm)[name = string("attention_1_qkvproj")]; tensor attention_1_head_reshape_shape_0 = const()[name = string("attention_1_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_1_head_reshape = reshape(shape = attention_1_head_reshape_shape_0, x = attention_1_qkvproj)[name = string("attention_1_head_reshape")]; tensor attention_1_head_transpose_perm_0 = const()[name = string("attention_1_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_1_split_qkv_heads_axis_0 = const()[name = string("attention_1_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_1_split_qkv_heads_split_sizes_0 = const()[name = string("attention_1_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_1_head_transpose = transpose(perm = attention_1_head_transpose_perm_0, x = attention_1_head_reshape)[name = string("transpose_46")]; tensor attention_1_split_qkv_heads_0, tensor attention_1_split_qkv_heads_1, tensor attention_1_split_qkv_heads_2 = split(axis = attention_1_split_qkv_heads_axis_0, split_sizes = attention_1_split_qkv_heads_split_sizes_0, x = attention_1_head_transpose)[name = string("attention_1_split_qkv_heads")]; tensor attention_1_q_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_1_q_rope_lhs_mult")]; int32 attention_1_q_rotate_half_split_num_splits_0 = const()[name = string("attention_1_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_1_q_rotate_half_split_axis_0 = const()[name = string("attention_1_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_1_q_rotate_half_split_0, tensor attention_1_q_rotate_half_split_1 = split(axis = attention_1_q_rotate_half_split_axis_0, num_splits = attention_1_q_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_0)[name = string("attention_1_q_rotate_half_split")]; fp16 attention_1_q_rotate_half_neg_y_0 = const()[name = string("attention_1_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_1_q_rotate_half_neg = mul(x = attention_1_q_rotate_half_split_1, y = attention_1_q_rotate_half_neg_y_0)[name = string("attention_1_q_rotate_half_neg")]; int32 attention_1_q_rotate_half_concat_axis_0 = const()[name = string("attention_1_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_1_q_rotate_half_concat_interleave_0 = const()[name = string("attention_1_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_1_q_rotate_half_concat = concat(axis = attention_1_q_rotate_half_concat_axis_0, interleave = attention_1_q_rotate_half_concat_interleave_0, values = (attention_1_q_rotate_half_neg, attention_1_q_rotate_half_split_0))[name = string("attention_1_q_rotate_half_concat")]; tensor attention_1_q_rope_rhs_mult = mul(x = attention_1_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_q_rope_rhs_mult")]; tensor attention_1_q_rope = add(x = attention_1_q_rope_lhs_mult, y = attention_1_q_rope_rhs_mult)[name = string("attention_1_q_rope")]; tensor attention_1_k_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_1_k_rope_lhs_mult")]; int32 attention_1_k_rotate_half_split_num_splits_0 = const()[name = string("attention_1_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_1_k_rotate_half_split_axis_0 = const()[name = string("attention_1_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_1_k_rotate_half_split_0, tensor attention_1_k_rotate_half_split_1 = split(axis = attention_1_k_rotate_half_split_axis_0, num_splits = attention_1_k_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_1)[name = string("attention_1_k_rotate_half_split")]; fp16 attention_1_k_rotate_half_neg_y_0 = const()[name = string("attention_1_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_1_k_rotate_half_neg = mul(x = attention_1_k_rotate_half_split_1, y = attention_1_k_rotate_half_neg_y_0)[name = string("attention_1_k_rotate_half_neg")]; int32 attention_1_k_rotate_half_concat_axis_0 = const()[name = string("attention_1_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_1_k_rotate_half_concat_interleave_0 = const()[name = string("attention_1_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_1_k_rotate_half_concat = concat(axis = attention_1_k_rotate_half_concat_axis_0, interleave = attention_1_k_rotate_half_concat_interleave_0, values = (attention_1_k_rotate_half_neg, attention_1_k_rotate_half_split_0))[name = string("attention_1_k_rotate_half_concat")]; tensor attention_1_k_rope_rhs_mult = mul(x = attention_1_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_k_rope_rhs_mult")]; tensor attention_1_k_rope = add(x = attention_1_k_rope_lhs_mult, y = attention_1_k_rope_rhs_mult)[name = string("attention_1_k_rope")]; int32 attention_1_q_splits_axis_0 = const()[name = string("attention_1_q_splits_axis_0"), val = int32(1)]; int32 attention_1_q_splits_num_splits_0 = const()[name = string("attention_1_q_splits_num_splits_0"), val = int32(2)]; tensor attention_1_q_splits_0, tensor attention_1_q_splits_1 = split(axis = attention_1_q_splits_axis_0, num_splits = attention_1_q_splits_num_splits_0, x = attention_1_q_rope)[name = string("attention_1_q_splits")]; tensor attention_1_update_begin_0_values0_0 = const()[name = string("attention_1_update_begin_0_values0_0"), val = tensor([1])]; tensor attention_1_update_begin_0_values1_0 = const()[name = string("attention_1_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_1_update_begin_0_values3_0 = const()[name = string("attention_1_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_1_update_begin_0_axis_0 = const()[name = string("attention_1_update_begin_0_axis_0"), val = int32(0)]; bool attention_1_update_begin_0_interleave_0 = const()[name = string("attention_1_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_1_update_begin_0 = concat(axis = attention_1_update_begin_0_axis_0, interleave = attention_1_update_begin_0_interleave_0, values = (attention_1_update_begin_0_values0_0, attention_1_update_begin_0_values1_0, query_pos1, attention_1_update_begin_0_values3_0))[name = string("attention_1_update_begin_0")]; tensor attention_1_update_end_0_values0_0 = const()[name = string("attention_1_update_end_0_values0_0"), val = tensor([2])]; tensor attention_1_update_end_0_values1_0 = const()[name = string("attention_1_update_end_0_values1_0"), val = tensor([2])]; tensor attention_1_update_end_0_values3_0 = const()[name = string("attention_1_update_end_0_values3_0"), val = tensor([64])]; int32 attention_1_update_end_0_axis_0 = const()[name = string("attention_1_update_end_0_axis_0"), val = int32(0)]; bool attention_1_update_end_0_interleave_0 = const()[name = string("attention_1_update_end_0_interleave_0"), val = bool(false)]; tensor attention_1_update_end_0 = concat(axis = attention_1_update_end_0_axis_0, interleave = attention_1_update_end_0_interleave_0, values = (attention_1_update_end_0_values0_0, attention_1_update_end_0_values1_0, end_pos_0, attention_1_update_end_0_values3_0))[name = string("attention_1_update_end_0")]; tensor attention_1_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_updated_key_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_key_cache_0_squeeze_mask_0, update = attention_1_k_rope, x = coreml_update_state_0)[name = string("attention_1_updated_key_cache_0")]; write_state(data = attention_1_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_2_write_state")]; tensor coreml_update_state_2 = read_state(input = key_cache_state)[name = string("coreml_update_state_2")]; tensor attention_1_key_cache_begin_0 = const()[name = string("attention_1_key_cache_begin_0"), val = tensor([1, 0, 0, 0])]; tensor attention_1_key_cache_end_0 = const()[name = string("attention_1_key_cache_end_0"), val = tensor([2, 2, 512, 64])]; tensor attention_1_key_cache_squeeze_mask_0 = const()[name = string("attention_1_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_key_cache = slice_by_index(begin = attention_1_key_cache_begin_0, end = attention_1_key_cache_end_0, squeeze_mask = attention_1_key_cache_squeeze_mask_0, x = coreml_update_state_2)[name = string("attention_1_key_cache")]; int32 attention_1_key_cache_head_axis_0 = const()[name = string("attention_1_key_cache_head_axis_0"), val = int32(1)]; int32 attention_1_key_cache_head_num_splits_0 = const()[name = string("attention_1_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_1_key_cache_head_0, tensor attention_1_key_cache_head_1 = split(axis = attention_1_key_cache_head_axis_0, num_splits = attention_1_key_cache_head_num_splits_0, x = attention_1_key_cache)[name = string("attention_1_key_cache_head")]; tensor attention_1_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_updated_value_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_value_cache_0_squeeze_mask_0, update = attention_1_split_qkv_heads_2, x = coreml_update_state_1)[name = string("attention_1_updated_value_cache_0")]; write_state(data = attention_1_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_3_write_state")]; tensor coreml_update_state_3 = read_state(input = value_cache_state)[name = string("coreml_update_state_3")]; tensor attention_1_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_1_slice_current_layer_value_cache_begin_0"), val = tensor([1, 0, 0, 0])]; tensor attention_1_slice_current_layer_value_cache_end_0 = const()[name = string("attention_1_slice_current_layer_value_cache_end_0"), val = tensor([2, 2, 512, 64])]; tensor attention_1_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_1_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_slice_current_layer_value_cache = slice_by_index(begin = attention_1_slice_current_layer_value_cache_begin_0, end = attention_1_slice_current_layer_value_cache_end_0, squeeze_mask = attention_1_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_3)[name = string("attention_1_slice_current_layer_value_cache")]; int32 attention_1_slice_value_cache_heads_axis_0 = const()[name = string("attention_1_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_1_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_1_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_1_slice_value_cache_heads_0, tensor attention_1_slice_value_cache_heads_1 = split(axis = attention_1_slice_value_cache_heads_axis_0, num_splits = attention_1_slice_value_cache_heads_num_splits_0, x = attention_1_slice_current_layer_value_cache)[name = string("attention_1_slice_value_cache_heads")]; bool attention_1_scores_0_transpose_y_0 = const()[name = string("attention_1_scores_0_transpose_y_0"), val = bool(true)]; bool attention_1_scores_0_transpose_x_0 = const()[name = string("attention_1_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_1_scores_0 = matmul(transpose_x = attention_1_scores_0_transpose_x_0, transpose_y = attention_1_scores_0_transpose_y_0, x = attention_1_key_cache_head_0, y = attention_1_q_splits_0)[name = string("attention_1_scores_0")]; fp16 attention_1_scaled_scores_0_y_0 = const()[name = string("attention_1_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_1_scaled_scores_0 = mul(x = attention_1_scores_0, y = attention_1_scaled_scores_0_y_0)[name = string("attention_1_scaled_scores_0")]; tensor attention_1_masked_scaled_scores_0 = add(x = attention_1_scaled_scores_0, y = transpose_0)[name = string("attention_1_masked_scaled_scores_0")]; int32 softmax_2_axis_0 = const()[name = string("softmax_2_axis_0"), val = int32(-2)]; tensor softmax_2 = softmax(axis = softmax_2_axis_0, x = attention_1_masked_scaled_scores_0)[name = string("softmax_2")]; bool attention_1_attention_0_transpose_x_0 = const()[name = string("attention_1_attention_0_transpose_x_0"), val = bool(true)]; bool attention_1_attention_0_transpose_y_0 = const()[name = string("attention_1_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_1_attention_0 = matmul(transpose_x = attention_1_attention_0_transpose_x_0, transpose_y = attention_1_attention_0_transpose_y_0, x = softmax_2, y = attention_1_slice_value_cache_heads_0)[name = string("attention_1_attention_0")]; bool attention_1_scores_1_transpose_y_0 = const()[name = string("attention_1_scores_1_transpose_y_0"), val = bool(true)]; bool attention_1_scores_1_transpose_x_0 = const()[name = string("attention_1_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_1_scores_1 = matmul(transpose_x = attention_1_scores_1_transpose_x_0, transpose_y = attention_1_scores_1_transpose_y_0, x = attention_1_key_cache_head_1, y = attention_1_q_splits_1)[name = string("attention_1_scores_1")]; fp16 attention_1_scaled_scores_1_y_0 = const()[name = string("attention_1_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_1_scaled_scores_1 = mul(x = attention_1_scores_1, y = attention_1_scaled_scores_1_y_0)[name = string("attention_1_scaled_scores_1")]; tensor attention_1_masked_scaled_scores_1 = add(x = attention_1_scaled_scores_1, y = transpose_0)[name = string("attention_1_masked_scaled_scores_1")]; int32 softmax_3_axis_0 = const()[name = string("softmax_3_axis_0"), val = int32(-2)]; tensor softmax_3 = softmax(axis = softmax_3_axis_0, x = attention_1_masked_scaled_scores_1)[name = string("softmax_3")]; bool attention_1_attention_1_transpose_x_0 = const()[name = string("attention_1_attention_1_transpose_x_0"), val = bool(true)]; bool attention_1_attention_1_transpose_y_0 = const()[name = string("attention_1_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_1_attention_1 = matmul(transpose_x = attention_1_attention_1_transpose_x_0, transpose_y = attention_1_attention_1_transpose_y_0, x = softmax_3, y = attention_1_slice_value_cache_heads_1)[name = string("attention_1_attention_1")]; int32 attention_1_concat_attention_all_heads_axis_0 = const()[name = string("attention_1_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_1_concat_attention_all_heads_interleave_0 = const()[name = string("attention_1_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_1_concat_attention_all_heads = concat(axis = attention_1_concat_attention_all_heads_axis_0, interleave = attention_1_concat_attention_all_heads_interleave_0, values = (attention_1_attention_0, attention_1_attention_1))[name = string("attention_1_concat_attention_all_heads")]; tensor attention_1_channels_first_retransposed_perm_0 = const()[name = string("attention_1_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_1_reshaped_shape_0 = const()[name = string("attention_1_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_1_channels_first_retransposed = transpose(perm = attention_1_channels_first_retransposed_perm_0, x = attention_1_concat_attention_all_heads)[name = string("transpose_45")]; tensor attention_1_reshaped = reshape(shape = attention_1_reshaped_shape_0, x = attention_1_channels_first_retransposed)[name = string("attention_1_reshaped")]; tensor attention_1_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303568000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304170176))))[name = string("attention_1_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_1 = constexpr_blockwise_shift_scale(data = attention_1_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304198912))))[name = string("constexpr_blockwise_shift_scale_1")]; tensor attention_1_outproj_strides_0 = const()[name = string("attention_1_outproj_strides_0"), val = tensor([1])]; string attention_1_outproj_pad_type_0 = const()[name = string("attention_1_outproj_pad_type_0"), val = string("valid")]; tensor attention_1_outproj_pad_0 = const()[name = string("attention_1_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_1_outproj_dilations_0 = const()[name = string("attention_1_outproj_dilations_0"), val = tensor([1])]; int32 attention_1_outproj_groups_0 = const()[name = string("attention_1_outproj_groups_0"), val = int32(1)]; tensor attention_1_outproj = conv(dilations = attention_1_outproj_dilations_0, groups = attention_1_outproj_groups_0, pad = attention_1_outproj_pad_0, pad_type = attention_1_outproj_pad_type_0, strides = attention_1_outproj_strides_0, weight = constexpr_blockwise_shift_scale_1, x = attention_1_reshaped)[name = string("attention_1_outproj")]; tensor block_1_residual_1 = add(x = block_0_residual_2, y = attention_1_outproj)[name = string("block_1_residual_1")]; tensor block_1_ffn_rmsnorm_abs = abs(x = block_1_residual_1)[name = string("block_1_ffn_rmsnorm_abs")]; tensor block_1_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_1_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_1_ffn_rmsnorm_maxval = reduce_max(axes = block_1_ffn_rmsnorm_maxval_axes_0, keep_dims = block_1_ffn_rmsnorm_maxval_keep_dims_0, x = block_1_ffn_rmsnorm_abs)[name = string("block_1_ffn_rmsnorm_maxval")]; fp16 block_1_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_1_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_1_ffn_rmsnorm_maxval_clipped = clip(alpha = block_1_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_1_ffn_rmsnorm_maxval_clipped_beta_0, x = block_1_ffn_rmsnorm_maxval)[name = string("block_1_ffn_rmsnorm_maxval_clipped")]; tensor block_1_ffn_rmsnorm_scaled = real_div(x = block_1_residual_1, y = block_1_ffn_rmsnorm_maxval_clipped)[name = string("block_1_ffn_rmsnorm_scaled")]; tensor block_1_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_1_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_1_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_1_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_1_ffn_rmsnorm_scaled)[name = string("block_1_ffn_rmsnorm_squared_sum")]; fp16 block_1_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_1_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_1_ffn_rmsnorm_rsqrt_epsilon_0, x = block_1_ffn_rmsnorm_squared_sum)[name = string("block_1_ffn_rmsnorm_rsqrt")]; fp16 block_1_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_1_ffn_rmsnorm_dim_scaled = mul(x = block_1_ffn_rmsnorm_scaled, y = block_1_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_1_ffn_rmsnorm_dim_scaled")]; tensor block_1_ffn_rmsnorm_normalized = mul(x = block_1_ffn_rmsnorm_dim_scaled, y = block_1_ffn_rmsnorm_rsqrt)[name = string("block_1_ffn_rmsnorm_normalized")]; tensor block_1_ffn_rmsnorm_y_0 = const()[name = string("block_1_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304200768)))]; tensor block_1_ffn_rmsnorm = mul(x = block_1_ffn_rmsnorm_normalized, y = block_1_ffn_rmsnorm_y_0)[name = string("block_1_ffn_rmsnorm")]; tensor block_1_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304202624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307471296))))[name = string("block_1_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_2 = constexpr_blockwise_shift_scale(data = block_1_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307627008))))[name = string("constexpr_blockwise_shift_scale_2")]; tensor block_1_ffn_inproj_strides_0 = const()[name = string("block_1_ffn_inproj_strides_0"), val = tensor([1])]; string block_1_ffn_inproj_pad_type_0 = const()[name = string("block_1_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_1_ffn_inproj_pad_0 = const()[name = string("block_1_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_1_ffn_inproj_dilations_0 = const()[name = string("block_1_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_1_ffn_inproj_groups_0 = const()[name = string("block_1_ffn_inproj_groups_0"), val = int32(1)]; tensor block_1_ffn_inproj = conv(dilations = block_1_ffn_inproj_dilations_0, groups = block_1_ffn_inproj_groups_0, pad = block_1_ffn_inproj_pad_0, pad_type = block_1_ffn_inproj_pad_type_0, strides = block_1_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_2, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_inproj")]; tensor block_1_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307636800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310905472))))[name = string("block_1_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_3 = constexpr_blockwise_shift_scale(data = block_1_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311061184))))[name = string("constexpr_blockwise_shift_scale_3")]; tensor block_1_ffn_g_strides_0 = const()[name = string("block_1_ffn_g_strides_0"), val = tensor([1])]; string block_1_ffn_g_pad_type_0 = const()[name = string("block_1_ffn_g_pad_type_0"), val = string("valid")]; tensor block_1_ffn_g_pad_0 = const()[name = string("block_1_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_1_ffn_g_dilations_0 = const()[name = string("block_1_ffn_g_dilations_0"), val = tensor([1])]; int32 block_1_ffn_g_groups_0 = const()[name = string("block_1_ffn_g_groups_0"), val = int32(1)]; tensor block_1_ffn_g = conv(dilations = block_1_ffn_g_dilations_0, groups = block_1_ffn_g_groups_0, pad = block_1_ffn_g_pad_0, pad_type = block_1_ffn_g_pad_type_0, strides = block_1_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_3, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_g")]; tensor block_1_ffn_g_activation = silu(x = block_1_ffn_g)[name = string("block_1_ffn_g_activation")]; tensor block_1_ffn_x_gated = mul(x = block_1_ffn_inproj, y = block_1_ffn_g_activation)[name = string("block_1_ffn_x_gated")]; tensor block_1_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311070976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314339648))))[name = string("block_1_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_4 = constexpr_blockwise_shift_scale(data = block_1_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314368384))))[name = string("constexpr_blockwise_shift_scale_4")]; tensor block_1_ffn_outproj_strides_0 = const()[name = string("block_1_ffn_outproj_strides_0"), val = tensor([1])]; string block_1_ffn_outproj_pad_type_0 = const()[name = string("block_1_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_1_ffn_outproj_pad_0 = const()[name = string("block_1_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_1_ffn_outproj_dilations_0 = const()[name = string("block_1_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_1_ffn_outproj_groups_0 = const()[name = string("block_1_ffn_outproj_groups_0"), val = int32(1)]; tensor block_1_ffn_outproj = conv(dilations = block_1_ffn_outproj_dilations_0, groups = block_1_ffn_outproj_groups_0, pad = block_1_ffn_outproj_pad_0, pad_type = block_1_ffn_outproj_pad_type_0, strides = block_1_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_4, x = block_1_ffn_x_gated)[name = string("block_1_ffn_outproj")]; tensor block_1_residual_2 = add(x = block_1_ffn_outproj, y = block_1_residual_1)[name = string("block_1_residual_2")]; tensor block_2_attention_rmsnorm_abs = abs(x = block_1_residual_2)[name = string("block_2_attention_rmsnorm_abs")]; tensor block_2_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_2_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_2_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_2_attention_rmsnorm_maxval = reduce_max(axes = block_2_attention_rmsnorm_maxval_axes_0, keep_dims = block_2_attention_rmsnorm_maxval_keep_dims_0, x = block_2_attention_rmsnorm_abs)[name = string("block_2_attention_rmsnorm_maxval")]; fp16 block_2_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_2_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_2_attention_rmsnorm_maxval_clipped = clip(alpha = block_2_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_2_attention_rmsnorm_maxval_clipped_beta_0, x = block_2_attention_rmsnorm_maxval)[name = string("block_2_attention_rmsnorm_maxval_clipped")]; tensor block_2_attention_rmsnorm_scaled = real_div(x = block_1_residual_2, y = block_2_attention_rmsnorm_maxval_clipped)[name = string("block_2_attention_rmsnorm_scaled")]; tensor block_2_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_2_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_2_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_2_attention_rmsnorm_squared_sum_keep_dims_0, x = block_2_attention_rmsnorm_scaled)[name = string("block_2_attention_rmsnorm_squared_sum")]; fp16 block_2_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_2_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_2_attention_rmsnorm_rsqrt_epsilon_0, x = block_2_attention_rmsnorm_squared_sum)[name = string("block_2_attention_rmsnorm_rsqrt")]; fp16 block_2_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_2_attention_rmsnorm_dim_scaled = mul(x = block_2_attention_rmsnorm_scaled, y = block_2_attention_rmsnorm_dim_scaled_y_0)[name = string("block_2_attention_rmsnorm_dim_scaled")]; tensor block_2_attention_rmsnorm_normalized = mul(x = block_2_attention_rmsnorm_dim_scaled, y = block_2_attention_rmsnorm_rsqrt)[name = string("block_2_attention_rmsnorm_normalized")]; tensor block_2_attention_rmsnorm_y_0 = const()[name = string("block_2_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314370240)))]; tensor block_2_attention_rmsnorm = mul(x = block_2_attention_rmsnorm_normalized, y = block_2_attention_rmsnorm_y_0)[name = string("block_2_attention_rmsnorm")]; tensor attention_2_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314372096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315146304))))[name = string("attention_2_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_5 = constexpr_blockwise_shift_scale(data = attention_2_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315183232))))[name = string("constexpr_blockwise_shift_scale_5")]; tensor attention_2_qkvproj_bias_0 = const()[name = string("attention_2_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315185600)))]; tensor attention_2_qkvproj_strides_0 = const()[name = string("attention_2_qkvproj_strides_0"), val = tensor([1])]; string attention_2_qkvproj_pad_type_0 = const()[name = string("attention_2_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_2_qkvproj_pad_0 = const()[name = string("attention_2_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_2_qkvproj_dilations_0 = const()[name = string("attention_2_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_2_qkvproj_groups_0 = const()[name = string("attention_2_qkvproj_groups_0"), val = int32(1)]; tensor attention_2_qkvproj = conv(bias = attention_2_qkvproj_bias_0, dilations = attention_2_qkvproj_dilations_0, groups = attention_2_qkvproj_groups_0, pad = attention_2_qkvproj_pad_0, pad_type = attention_2_qkvproj_pad_type_0, strides = attention_2_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_5, x = block_2_attention_rmsnorm)[name = string("attention_2_qkvproj")]; tensor attention_2_head_reshape_shape_0 = const()[name = string("attention_2_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_2_head_reshape = reshape(shape = attention_2_head_reshape_shape_0, x = attention_2_qkvproj)[name = string("attention_2_head_reshape")]; tensor attention_2_head_transpose_perm_0 = const()[name = string("attention_2_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_2_split_qkv_heads_axis_0 = const()[name = string("attention_2_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_2_split_qkv_heads_split_sizes_0 = const()[name = string("attention_2_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_2_head_transpose = transpose(perm = attention_2_head_transpose_perm_0, x = attention_2_head_reshape)[name = string("transpose_44")]; tensor attention_2_split_qkv_heads_0, tensor attention_2_split_qkv_heads_1, tensor attention_2_split_qkv_heads_2 = split(axis = attention_2_split_qkv_heads_axis_0, split_sizes = attention_2_split_qkv_heads_split_sizes_0, x = attention_2_head_transpose)[name = string("attention_2_split_qkv_heads")]; tensor attention_2_q_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_2_q_rope_lhs_mult")]; int32 attention_2_q_rotate_half_split_num_splits_0 = const()[name = string("attention_2_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_2_q_rotate_half_split_axis_0 = const()[name = string("attention_2_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_2_q_rotate_half_split_0, tensor attention_2_q_rotate_half_split_1 = split(axis = attention_2_q_rotate_half_split_axis_0, num_splits = attention_2_q_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_0)[name = string("attention_2_q_rotate_half_split")]; fp16 attention_2_q_rotate_half_neg_y_0 = const()[name = string("attention_2_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_2_q_rotate_half_neg = mul(x = attention_2_q_rotate_half_split_1, y = attention_2_q_rotate_half_neg_y_0)[name = string("attention_2_q_rotate_half_neg")]; int32 attention_2_q_rotate_half_concat_axis_0 = const()[name = string("attention_2_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_2_q_rotate_half_concat_interleave_0 = const()[name = string("attention_2_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_2_q_rotate_half_concat = concat(axis = attention_2_q_rotate_half_concat_axis_0, interleave = attention_2_q_rotate_half_concat_interleave_0, values = (attention_2_q_rotate_half_neg, attention_2_q_rotate_half_split_0))[name = string("attention_2_q_rotate_half_concat")]; tensor attention_2_q_rope_rhs_mult = mul(x = attention_2_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_q_rope_rhs_mult")]; tensor attention_2_q_rope = add(x = attention_2_q_rope_lhs_mult, y = attention_2_q_rope_rhs_mult)[name = string("attention_2_q_rope")]; tensor attention_2_k_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_2_k_rope_lhs_mult")]; int32 attention_2_k_rotate_half_split_num_splits_0 = const()[name = string("attention_2_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_2_k_rotate_half_split_axis_0 = const()[name = string("attention_2_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_2_k_rotate_half_split_0, tensor attention_2_k_rotate_half_split_1 = split(axis = attention_2_k_rotate_half_split_axis_0, num_splits = attention_2_k_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_1)[name = string("attention_2_k_rotate_half_split")]; fp16 attention_2_k_rotate_half_neg_y_0 = const()[name = string("attention_2_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_2_k_rotate_half_neg = mul(x = attention_2_k_rotate_half_split_1, y = attention_2_k_rotate_half_neg_y_0)[name = string("attention_2_k_rotate_half_neg")]; int32 attention_2_k_rotate_half_concat_axis_0 = const()[name = string("attention_2_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_2_k_rotate_half_concat_interleave_0 = const()[name = string("attention_2_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_2_k_rotate_half_concat = concat(axis = attention_2_k_rotate_half_concat_axis_0, interleave = attention_2_k_rotate_half_concat_interleave_0, values = (attention_2_k_rotate_half_neg, attention_2_k_rotate_half_split_0))[name = string("attention_2_k_rotate_half_concat")]; tensor attention_2_k_rope_rhs_mult = mul(x = attention_2_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_k_rope_rhs_mult")]; tensor attention_2_k_rope = add(x = attention_2_k_rope_lhs_mult, y = attention_2_k_rope_rhs_mult)[name = string("attention_2_k_rope")]; int32 attention_2_q_splits_axis_0 = const()[name = string("attention_2_q_splits_axis_0"), val = int32(1)]; int32 attention_2_q_splits_num_splits_0 = const()[name = string("attention_2_q_splits_num_splits_0"), val = int32(2)]; tensor attention_2_q_splits_0, tensor attention_2_q_splits_1 = split(axis = attention_2_q_splits_axis_0, num_splits = attention_2_q_splits_num_splits_0, x = attention_2_q_rope)[name = string("attention_2_q_splits")]; tensor attention_2_update_begin_0_values0_0 = const()[name = string("attention_2_update_begin_0_values0_0"), val = tensor([2])]; tensor attention_2_update_begin_0_values1_0 = const()[name = string("attention_2_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_2_update_begin_0_values3_0 = const()[name = string("attention_2_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_2_update_begin_0_axis_0 = const()[name = string("attention_2_update_begin_0_axis_0"), val = int32(0)]; bool attention_2_update_begin_0_interleave_0 = const()[name = string("attention_2_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_2_update_begin_0 = concat(axis = attention_2_update_begin_0_axis_0, interleave = attention_2_update_begin_0_interleave_0, values = (attention_2_update_begin_0_values0_0, attention_2_update_begin_0_values1_0, query_pos1, attention_2_update_begin_0_values3_0))[name = string("attention_2_update_begin_0")]; tensor attention_2_update_end_0_values0_0 = const()[name = string("attention_2_update_end_0_values0_0"), val = tensor([3])]; tensor attention_2_update_end_0_values1_0 = const()[name = string("attention_2_update_end_0_values1_0"), val = tensor([2])]; tensor attention_2_update_end_0_values3_0 = const()[name = string("attention_2_update_end_0_values3_0"), val = tensor([64])]; int32 attention_2_update_end_0_axis_0 = const()[name = string("attention_2_update_end_0_axis_0"), val = int32(0)]; bool attention_2_update_end_0_interleave_0 = const()[name = string("attention_2_update_end_0_interleave_0"), val = bool(false)]; tensor attention_2_update_end_0 = concat(axis = attention_2_update_end_0_axis_0, interleave = attention_2_update_end_0_interleave_0, values = (attention_2_update_end_0_values0_0, attention_2_update_end_0_values1_0, end_pos_0, attention_2_update_end_0_values3_0))[name = string("attention_2_update_end_0")]; tensor attention_2_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_updated_key_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_key_cache_0_squeeze_mask_0, update = attention_2_k_rope, x = coreml_update_state_2)[name = string("attention_2_updated_key_cache_0")]; write_state(data = attention_2_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_4_write_state")]; tensor coreml_update_state_4 = read_state(input = key_cache_state)[name = string("coreml_update_state_4")]; tensor attention_2_key_cache_begin_0 = const()[name = string("attention_2_key_cache_begin_0"), val = tensor([2, 0, 0, 0])]; tensor attention_2_key_cache_end_0 = const()[name = string("attention_2_key_cache_end_0"), val = tensor([3, 2, 512, 64])]; tensor attention_2_key_cache_squeeze_mask_0 = const()[name = string("attention_2_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_key_cache = slice_by_index(begin = attention_2_key_cache_begin_0, end = attention_2_key_cache_end_0, squeeze_mask = attention_2_key_cache_squeeze_mask_0, x = coreml_update_state_4)[name = string("attention_2_key_cache")]; int32 attention_2_key_cache_head_axis_0 = const()[name = string("attention_2_key_cache_head_axis_0"), val = int32(1)]; int32 attention_2_key_cache_head_num_splits_0 = const()[name = string("attention_2_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_2_key_cache_head_0, tensor attention_2_key_cache_head_1 = split(axis = attention_2_key_cache_head_axis_0, num_splits = attention_2_key_cache_head_num_splits_0, x = attention_2_key_cache)[name = string("attention_2_key_cache_head")]; tensor attention_2_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_updated_value_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_value_cache_0_squeeze_mask_0, update = attention_2_split_qkv_heads_2, x = coreml_update_state_3)[name = string("attention_2_updated_value_cache_0")]; write_state(data = attention_2_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_5_write_state")]; tensor coreml_update_state_5 = read_state(input = value_cache_state)[name = string("coreml_update_state_5")]; tensor attention_2_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_2_slice_current_layer_value_cache_begin_0"), val = tensor([2, 0, 0, 0])]; tensor attention_2_slice_current_layer_value_cache_end_0 = const()[name = string("attention_2_slice_current_layer_value_cache_end_0"), val = tensor([3, 2, 512, 64])]; tensor attention_2_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_2_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_slice_current_layer_value_cache = slice_by_index(begin = attention_2_slice_current_layer_value_cache_begin_0, end = attention_2_slice_current_layer_value_cache_end_0, squeeze_mask = attention_2_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_5)[name = string("attention_2_slice_current_layer_value_cache")]; int32 attention_2_slice_value_cache_heads_axis_0 = const()[name = string("attention_2_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_2_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_2_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_2_slice_value_cache_heads_0, tensor attention_2_slice_value_cache_heads_1 = split(axis = attention_2_slice_value_cache_heads_axis_0, num_splits = attention_2_slice_value_cache_heads_num_splits_0, x = attention_2_slice_current_layer_value_cache)[name = string("attention_2_slice_value_cache_heads")]; bool attention_2_scores_0_transpose_y_0 = const()[name = string("attention_2_scores_0_transpose_y_0"), val = bool(true)]; bool attention_2_scores_0_transpose_x_0 = const()[name = string("attention_2_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_2_scores_0 = matmul(transpose_x = attention_2_scores_0_transpose_x_0, transpose_y = attention_2_scores_0_transpose_y_0, x = attention_2_key_cache_head_0, y = attention_2_q_splits_0)[name = string("attention_2_scores_0")]; fp16 attention_2_scaled_scores_0_y_0 = const()[name = string("attention_2_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_2_scaled_scores_0 = mul(x = attention_2_scores_0, y = attention_2_scaled_scores_0_y_0)[name = string("attention_2_scaled_scores_0")]; tensor attention_2_masked_scaled_scores_0 = add(x = attention_2_scaled_scores_0, y = transpose_0)[name = string("attention_2_masked_scaled_scores_0")]; int32 softmax_4_axis_0 = const()[name = string("softmax_4_axis_0"), val = int32(-2)]; tensor softmax_4 = softmax(axis = softmax_4_axis_0, x = attention_2_masked_scaled_scores_0)[name = string("softmax_4")]; bool attention_2_attention_0_transpose_x_0 = const()[name = string("attention_2_attention_0_transpose_x_0"), val = bool(true)]; bool attention_2_attention_0_transpose_y_0 = const()[name = string("attention_2_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_2_attention_0 = matmul(transpose_x = attention_2_attention_0_transpose_x_0, transpose_y = attention_2_attention_0_transpose_y_0, x = softmax_4, y = attention_2_slice_value_cache_heads_0)[name = string("attention_2_attention_0")]; bool attention_2_scores_1_transpose_y_0 = const()[name = string("attention_2_scores_1_transpose_y_0"), val = bool(true)]; bool attention_2_scores_1_transpose_x_0 = const()[name = string("attention_2_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_2_scores_1 = matmul(transpose_x = attention_2_scores_1_transpose_x_0, transpose_y = attention_2_scores_1_transpose_y_0, x = attention_2_key_cache_head_1, y = attention_2_q_splits_1)[name = string("attention_2_scores_1")]; fp16 attention_2_scaled_scores_1_y_0 = const()[name = string("attention_2_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_2_scaled_scores_1 = mul(x = attention_2_scores_1, y = attention_2_scaled_scores_1_y_0)[name = string("attention_2_scaled_scores_1")]; tensor attention_2_masked_scaled_scores_1 = add(x = attention_2_scaled_scores_1, y = transpose_0)[name = string("attention_2_masked_scaled_scores_1")]; int32 softmax_5_axis_0 = const()[name = string("softmax_5_axis_0"), val = int32(-2)]; tensor softmax_5 = softmax(axis = softmax_5_axis_0, x = attention_2_masked_scaled_scores_1)[name = string("softmax_5")]; bool attention_2_attention_1_transpose_x_0 = const()[name = string("attention_2_attention_1_transpose_x_0"), val = bool(true)]; bool attention_2_attention_1_transpose_y_0 = const()[name = string("attention_2_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_2_attention_1 = matmul(transpose_x = attention_2_attention_1_transpose_x_0, transpose_y = attention_2_attention_1_transpose_y_0, x = softmax_5, y = attention_2_slice_value_cache_heads_1)[name = string("attention_2_attention_1")]; int32 attention_2_concat_attention_all_heads_axis_0 = const()[name = string("attention_2_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_2_concat_attention_all_heads_interleave_0 = const()[name = string("attention_2_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_2_concat_attention_all_heads = concat(axis = attention_2_concat_attention_all_heads_axis_0, interleave = attention_2_concat_attention_all_heads_interleave_0, values = (attention_2_attention_0, attention_2_attention_1))[name = string("attention_2_concat_attention_all_heads")]; tensor attention_2_channels_first_retransposed_perm_0 = const()[name = string("attention_2_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_2_reshaped_shape_0 = const()[name = string("attention_2_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_2_channels_first_retransposed = transpose(perm = attention_2_channels_first_retransposed_perm_0, x = attention_2_concat_attention_all_heads)[name = string("transpose_43")]; tensor attention_2_reshaped = reshape(shape = attention_2_reshaped_shape_0, x = attention_2_channels_first_retransposed)[name = string("attention_2_reshaped")]; tensor attention_2_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315187968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315790144))))[name = string("attention_2_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_6 = constexpr_blockwise_shift_scale(data = attention_2_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315818880))))[name = string("constexpr_blockwise_shift_scale_6")]; tensor attention_2_outproj_strides_0 = const()[name = string("attention_2_outproj_strides_0"), val = tensor([1])]; string attention_2_outproj_pad_type_0 = const()[name = string("attention_2_outproj_pad_type_0"), val = string("valid")]; tensor attention_2_outproj_pad_0 = const()[name = string("attention_2_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_2_outproj_dilations_0 = const()[name = string("attention_2_outproj_dilations_0"), val = tensor([1])]; int32 attention_2_outproj_groups_0 = const()[name = string("attention_2_outproj_groups_0"), val = int32(1)]; tensor attention_2_outproj = conv(dilations = attention_2_outproj_dilations_0, groups = attention_2_outproj_groups_0, pad = attention_2_outproj_pad_0, pad_type = attention_2_outproj_pad_type_0, strides = attention_2_outproj_strides_0, weight = constexpr_blockwise_shift_scale_6, x = attention_2_reshaped)[name = string("attention_2_outproj")]; tensor block_2_residual_1 = add(x = block_1_residual_2, y = attention_2_outproj)[name = string("block_2_residual_1")]; tensor block_2_ffn_rmsnorm_abs = abs(x = block_2_residual_1)[name = string("block_2_ffn_rmsnorm_abs")]; tensor block_2_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_2_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_2_ffn_rmsnorm_maxval = reduce_max(axes = block_2_ffn_rmsnorm_maxval_axes_0, keep_dims = block_2_ffn_rmsnorm_maxval_keep_dims_0, x = block_2_ffn_rmsnorm_abs)[name = string("block_2_ffn_rmsnorm_maxval")]; fp16 block_2_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_2_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_2_ffn_rmsnorm_maxval_clipped = clip(alpha = block_2_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_2_ffn_rmsnorm_maxval_clipped_beta_0, x = block_2_ffn_rmsnorm_maxval)[name = string("block_2_ffn_rmsnorm_maxval_clipped")]; tensor block_2_ffn_rmsnorm_scaled = real_div(x = block_2_residual_1, y = block_2_ffn_rmsnorm_maxval_clipped)[name = string("block_2_ffn_rmsnorm_scaled")]; tensor block_2_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_2_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_2_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_2_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_2_ffn_rmsnorm_scaled)[name = string("block_2_ffn_rmsnorm_squared_sum")]; fp16 block_2_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_2_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_2_ffn_rmsnorm_rsqrt_epsilon_0, x = block_2_ffn_rmsnorm_squared_sum)[name = string("block_2_ffn_rmsnorm_rsqrt")]; fp16 block_2_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_2_ffn_rmsnorm_dim_scaled = mul(x = block_2_ffn_rmsnorm_scaled, y = block_2_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_2_ffn_rmsnorm_dim_scaled")]; tensor block_2_ffn_rmsnorm_normalized = mul(x = block_2_ffn_rmsnorm_dim_scaled, y = block_2_ffn_rmsnorm_rsqrt)[name = string("block_2_ffn_rmsnorm_normalized")]; tensor block_2_ffn_rmsnorm_y_0 = const()[name = string("block_2_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315820736)))]; tensor block_2_ffn_rmsnorm = mul(x = block_2_ffn_rmsnorm_normalized, y = block_2_ffn_rmsnorm_y_0)[name = string("block_2_ffn_rmsnorm")]; tensor block_2_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315822592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319091264))))[name = string("block_2_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_7 = constexpr_blockwise_shift_scale(data = block_2_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319246976))))[name = string("constexpr_blockwise_shift_scale_7")]; tensor block_2_ffn_inproj_strides_0 = const()[name = string("block_2_ffn_inproj_strides_0"), val = tensor([1])]; string block_2_ffn_inproj_pad_type_0 = const()[name = string("block_2_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_2_ffn_inproj_pad_0 = const()[name = string("block_2_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_2_ffn_inproj_dilations_0 = const()[name = string("block_2_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_2_ffn_inproj_groups_0 = const()[name = string("block_2_ffn_inproj_groups_0"), val = int32(1)]; tensor block_2_ffn_inproj = conv(dilations = block_2_ffn_inproj_dilations_0, groups = block_2_ffn_inproj_groups_0, pad = block_2_ffn_inproj_pad_0, pad_type = block_2_ffn_inproj_pad_type_0, strides = block_2_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_7, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_inproj")]; tensor block_2_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319256768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322525440))))[name = string("block_2_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_8 = constexpr_blockwise_shift_scale(data = block_2_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322681152))))[name = string("constexpr_blockwise_shift_scale_8")]; tensor block_2_ffn_g_strides_0 = const()[name = string("block_2_ffn_g_strides_0"), val = tensor([1])]; string block_2_ffn_g_pad_type_0 = const()[name = string("block_2_ffn_g_pad_type_0"), val = string("valid")]; tensor block_2_ffn_g_pad_0 = const()[name = string("block_2_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_2_ffn_g_dilations_0 = const()[name = string("block_2_ffn_g_dilations_0"), val = tensor([1])]; int32 block_2_ffn_g_groups_0 = const()[name = string("block_2_ffn_g_groups_0"), val = int32(1)]; tensor block_2_ffn_g = conv(dilations = block_2_ffn_g_dilations_0, groups = block_2_ffn_g_groups_0, pad = block_2_ffn_g_pad_0, pad_type = block_2_ffn_g_pad_type_0, strides = block_2_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_8, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_g")]; tensor block_2_ffn_g_activation = silu(x = block_2_ffn_g)[name = string("block_2_ffn_g_activation")]; tensor block_2_ffn_x_gated = mul(x = block_2_ffn_inproj, y = block_2_ffn_g_activation)[name = string("block_2_ffn_x_gated")]; tensor block_2_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322690944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325959616))))[name = string("block_2_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_9 = constexpr_blockwise_shift_scale(data = block_2_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325988352))))[name = string("constexpr_blockwise_shift_scale_9")]; tensor block_2_ffn_outproj_strides_0 = const()[name = string("block_2_ffn_outproj_strides_0"), val = tensor([1])]; string block_2_ffn_outproj_pad_type_0 = const()[name = string("block_2_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_2_ffn_outproj_pad_0 = const()[name = string("block_2_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_2_ffn_outproj_dilations_0 = const()[name = string("block_2_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_2_ffn_outproj_groups_0 = const()[name = string("block_2_ffn_outproj_groups_0"), val = int32(1)]; tensor block_2_ffn_outproj = conv(dilations = block_2_ffn_outproj_dilations_0, groups = block_2_ffn_outproj_groups_0, pad = block_2_ffn_outproj_pad_0, pad_type = block_2_ffn_outproj_pad_type_0, strides = block_2_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_9, x = block_2_ffn_x_gated)[name = string("block_2_ffn_outproj")]; tensor block_2_residual_2 = add(x = block_2_ffn_outproj, y = block_2_residual_1)[name = string("block_2_residual_2")]; tensor block_3_attention_rmsnorm_abs = abs(x = block_2_residual_2)[name = string("block_3_attention_rmsnorm_abs")]; tensor block_3_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_3_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_3_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_3_attention_rmsnorm_maxval = reduce_max(axes = block_3_attention_rmsnorm_maxval_axes_0, keep_dims = block_3_attention_rmsnorm_maxval_keep_dims_0, x = block_3_attention_rmsnorm_abs)[name = string("block_3_attention_rmsnorm_maxval")]; fp16 block_3_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_3_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_3_attention_rmsnorm_maxval_clipped = clip(alpha = block_3_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_3_attention_rmsnorm_maxval_clipped_beta_0, x = block_3_attention_rmsnorm_maxval)[name = string("block_3_attention_rmsnorm_maxval_clipped")]; tensor block_3_attention_rmsnorm_scaled = real_div(x = block_2_residual_2, y = block_3_attention_rmsnorm_maxval_clipped)[name = string("block_3_attention_rmsnorm_scaled")]; tensor block_3_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_3_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_3_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_3_attention_rmsnorm_squared_sum_keep_dims_0, x = block_3_attention_rmsnorm_scaled)[name = string("block_3_attention_rmsnorm_squared_sum")]; fp16 block_3_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_3_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_3_attention_rmsnorm_rsqrt_epsilon_0, x = block_3_attention_rmsnorm_squared_sum)[name = string("block_3_attention_rmsnorm_rsqrt")]; fp16 block_3_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_3_attention_rmsnorm_dim_scaled = mul(x = block_3_attention_rmsnorm_scaled, y = block_3_attention_rmsnorm_dim_scaled_y_0)[name = string("block_3_attention_rmsnorm_dim_scaled")]; tensor block_3_attention_rmsnorm_normalized = mul(x = block_3_attention_rmsnorm_dim_scaled, y = block_3_attention_rmsnorm_rsqrt)[name = string("block_3_attention_rmsnorm_normalized")]; tensor block_3_attention_rmsnorm_y_0 = const()[name = string("block_3_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325990208)))]; tensor block_3_attention_rmsnorm = mul(x = block_3_attention_rmsnorm_normalized, y = block_3_attention_rmsnorm_y_0)[name = string("block_3_attention_rmsnorm")]; tensor attention_3_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325992064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326766272))))[name = string("attention_3_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_10 = constexpr_blockwise_shift_scale(data = attention_3_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326803200))))[name = string("constexpr_blockwise_shift_scale_10")]; tensor attention_3_qkvproj_bias_0 = const()[name = string("attention_3_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326805568)))]; tensor attention_3_qkvproj_strides_0 = const()[name = string("attention_3_qkvproj_strides_0"), val = tensor([1])]; string attention_3_qkvproj_pad_type_0 = const()[name = string("attention_3_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_3_qkvproj_pad_0 = const()[name = string("attention_3_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_3_qkvproj_dilations_0 = const()[name = string("attention_3_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_3_qkvproj_groups_0 = const()[name = string("attention_3_qkvproj_groups_0"), val = int32(1)]; tensor attention_3_qkvproj = conv(bias = attention_3_qkvproj_bias_0, dilations = attention_3_qkvproj_dilations_0, groups = attention_3_qkvproj_groups_0, pad = attention_3_qkvproj_pad_0, pad_type = attention_3_qkvproj_pad_type_0, strides = attention_3_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_10, x = block_3_attention_rmsnorm)[name = string("attention_3_qkvproj")]; tensor attention_3_head_reshape_shape_0 = const()[name = string("attention_3_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_3_head_reshape = reshape(shape = attention_3_head_reshape_shape_0, x = attention_3_qkvproj)[name = string("attention_3_head_reshape")]; tensor attention_3_head_transpose_perm_0 = const()[name = string("attention_3_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_3_split_qkv_heads_axis_0 = const()[name = string("attention_3_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_3_split_qkv_heads_split_sizes_0 = const()[name = string("attention_3_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_3_head_transpose = transpose(perm = attention_3_head_transpose_perm_0, x = attention_3_head_reshape)[name = string("transpose_42")]; tensor attention_3_split_qkv_heads_0, tensor attention_3_split_qkv_heads_1, tensor attention_3_split_qkv_heads_2 = split(axis = attention_3_split_qkv_heads_axis_0, split_sizes = attention_3_split_qkv_heads_split_sizes_0, x = attention_3_head_transpose)[name = string("attention_3_split_qkv_heads")]; tensor attention_3_q_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_3_q_rope_lhs_mult")]; int32 attention_3_q_rotate_half_split_num_splits_0 = const()[name = string("attention_3_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_3_q_rotate_half_split_axis_0 = const()[name = string("attention_3_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_3_q_rotate_half_split_0, tensor attention_3_q_rotate_half_split_1 = split(axis = attention_3_q_rotate_half_split_axis_0, num_splits = attention_3_q_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_0)[name = string("attention_3_q_rotate_half_split")]; fp16 attention_3_q_rotate_half_neg_y_0 = const()[name = string("attention_3_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_3_q_rotate_half_neg = mul(x = attention_3_q_rotate_half_split_1, y = attention_3_q_rotate_half_neg_y_0)[name = string("attention_3_q_rotate_half_neg")]; int32 attention_3_q_rotate_half_concat_axis_0 = const()[name = string("attention_3_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_3_q_rotate_half_concat_interleave_0 = const()[name = string("attention_3_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_3_q_rotate_half_concat = concat(axis = attention_3_q_rotate_half_concat_axis_0, interleave = attention_3_q_rotate_half_concat_interleave_0, values = (attention_3_q_rotate_half_neg, attention_3_q_rotate_half_split_0))[name = string("attention_3_q_rotate_half_concat")]; tensor attention_3_q_rope_rhs_mult = mul(x = attention_3_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_q_rope_rhs_mult")]; tensor attention_3_q_rope = add(x = attention_3_q_rope_lhs_mult, y = attention_3_q_rope_rhs_mult)[name = string("attention_3_q_rope")]; tensor attention_3_k_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_3_k_rope_lhs_mult")]; int32 attention_3_k_rotate_half_split_num_splits_0 = const()[name = string("attention_3_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_3_k_rotate_half_split_axis_0 = const()[name = string("attention_3_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_3_k_rotate_half_split_0, tensor attention_3_k_rotate_half_split_1 = split(axis = attention_3_k_rotate_half_split_axis_0, num_splits = attention_3_k_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_1)[name = string("attention_3_k_rotate_half_split")]; fp16 attention_3_k_rotate_half_neg_y_0 = const()[name = string("attention_3_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_3_k_rotate_half_neg = mul(x = attention_3_k_rotate_half_split_1, y = attention_3_k_rotate_half_neg_y_0)[name = string("attention_3_k_rotate_half_neg")]; int32 attention_3_k_rotate_half_concat_axis_0 = const()[name = string("attention_3_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_3_k_rotate_half_concat_interleave_0 = const()[name = string("attention_3_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_3_k_rotate_half_concat = concat(axis = attention_3_k_rotate_half_concat_axis_0, interleave = attention_3_k_rotate_half_concat_interleave_0, values = (attention_3_k_rotate_half_neg, attention_3_k_rotate_half_split_0))[name = string("attention_3_k_rotate_half_concat")]; tensor attention_3_k_rope_rhs_mult = mul(x = attention_3_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_k_rope_rhs_mult")]; tensor attention_3_k_rope = add(x = attention_3_k_rope_lhs_mult, y = attention_3_k_rope_rhs_mult)[name = string("attention_3_k_rope")]; int32 attention_3_q_splits_axis_0 = const()[name = string("attention_3_q_splits_axis_0"), val = int32(1)]; int32 attention_3_q_splits_num_splits_0 = const()[name = string("attention_3_q_splits_num_splits_0"), val = int32(2)]; tensor attention_3_q_splits_0, tensor attention_3_q_splits_1 = split(axis = attention_3_q_splits_axis_0, num_splits = attention_3_q_splits_num_splits_0, x = attention_3_q_rope)[name = string("attention_3_q_splits")]; tensor attention_3_update_begin_0_values0_0 = const()[name = string("attention_3_update_begin_0_values0_0"), val = tensor([3])]; tensor attention_3_update_begin_0_values1_0 = const()[name = string("attention_3_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_3_update_begin_0_values3_0 = const()[name = string("attention_3_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_3_update_begin_0_axis_0 = const()[name = string("attention_3_update_begin_0_axis_0"), val = int32(0)]; bool attention_3_update_begin_0_interleave_0 = const()[name = string("attention_3_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_3_update_begin_0 = concat(axis = attention_3_update_begin_0_axis_0, interleave = attention_3_update_begin_0_interleave_0, values = (attention_3_update_begin_0_values0_0, attention_3_update_begin_0_values1_0, query_pos1, attention_3_update_begin_0_values3_0))[name = string("attention_3_update_begin_0")]; tensor attention_3_update_end_0_values0_0 = const()[name = string("attention_3_update_end_0_values0_0"), val = tensor([4])]; tensor attention_3_update_end_0_values1_0 = const()[name = string("attention_3_update_end_0_values1_0"), val = tensor([2])]; tensor attention_3_update_end_0_values3_0 = const()[name = string("attention_3_update_end_0_values3_0"), val = tensor([64])]; int32 attention_3_update_end_0_axis_0 = const()[name = string("attention_3_update_end_0_axis_0"), val = int32(0)]; bool attention_3_update_end_0_interleave_0 = const()[name = string("attention_3_update_end_0_interleave_0"), val = bool(false)]; tensor attention_3_update_end_0 = concat(axis = attention_3_update_end_0_axis_0, interleave = attention_3_update_end_0_interleave_0, values = (attention_3_update_end_0_values0_0, attention_3_update_end_0_values1_0, end_pos_0, attention_3_update_end_0_values3_0))[name = string("attention_3_update_end_0")]; tensor attention_3_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_updated_key_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_key_cache_0_squeeze_mask_0, update = attention_3_k_rope, x = coreml_update_state_4)[name = string("attention_3_updated_key_cache_0")]; write_state(data = attention_3_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_6_write_state")]; tensor coreml_update_state_6 = read_state(input = key_cache_state)[name = string("coreml_update_state_6")]; tensor attention_3_key_cache_begin_0 = const()[name = string("attention_3_key_cache_begin_0"), val = tensor([3, 0, 0, 0])]; tensor attention_3_key_cache_end_0 = const()[name = string("attention_3_key_cache_end_0"), val = tensor([4, 2, 512, 64])]; tensor attention_3_key_cache_squeeze_mask_0 = const()[name = string("attention_3_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_key_cache = slice_by_index(begin = attention_3_key_cache_begin_0, end = attention_3_key_cache_end_0, squeeze_mask = attention_3_key_cache_squeeze_mask_0, x = coreml_update_state_6)[name = string("attention_3_key_cache")]; int32 attention_3_key_cache_head_axis_0 = const()[name = string("attention_3_key_cache_head_axis_0"), val = int32(1)]; int32 attention_3_key_cache_head_num_splits_0 = const()[name = string("attention_3_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_3_key_cache_head_0, tensor attention_3_key_cache_head_1 = split(axis = attention_3_key_cache_head_axis_0, num_splits = attention_3_key_cache_head_num_splits_0, x = attention_3_key_cache)[name = string("attention_3_key_cache_head")]; tensor attention_3_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_updated_value_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_value_cache_0_squeeze_mask_0, update = attention_3_split_qkv_heads_2, x = coreml_update_state_5)[name = string("attention_3_updated_value_cache_0")]; write_state(data = attention_3_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_7_write_state")]; tensor coreml_update_state_7 = read_state(input = value_cache_state)[name = string("coreml_update_state_7")]; tensor attention_3_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_3_slice_current_layer_value_cache_begin_0"), val = tensor([3, 0, 0, 0])]; tensor attention_3_slice_current_layer_value_cache_end_0 = const()[name = string("attention_3_slice_current_layer_value_cache_end_0"), val = tensor([4, 2, 512, 64])]; tensor attention_3_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_3_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_slice_current_layer_value_cache = slice_by_index(begin = attention_3_slice_current_layer_value_cache_begin_0, end = attention_3_slice_current_layer_value_cache_end_0, squeeze_mask = attention_3_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_7)[name = string("attention_3_slice_current_layer_value_cache")]; int32 attention_3_slice_value_cache_heads_axis_0 = const()[name = string("attention_3_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_3_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_3_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_3_slice_value_cache_heads_0, tensor attention_3_slice_value_cache_heads_1 = split(axis = attention_3_slice_value_cache_heads_axis_0, num_splits = attention_3_slice_value_cache_heads_num_splits_0, x = attention_3_slice_current_layer_value_cache)[name = string("attention_3_slice_value_cache_heads")]; bool attention_3_scores_0_transpose_y_0 = const()[name = string("attention_3_scores_0_transpose_y_0"), val = bool(true)]; bool attention_3_scores_0_transpose_x_0 = const()[name = string("attention_3_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_3_scores_0 = matmul(transpose_x = attention_3_scores_0_transpose_x_0, transpose_y = attention_3_scores_0_transpose_y_0, x = attention_3_key_cache_head_0, y = attention_3_q_splits_0)[name = string("attention_3_scores_0")]; fp16 attention_3_scaled_scores_0_y_0 = const()[name = string("attention_3_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_3_scaled_scores_0 = mul(x = attention_3_scores_0, y = attention_3_scaled_scores_0_y_0)[name = string("attention_3_scaled_scores_0")]; tensor attention_3_masked_scaled_scores_0 = add(x = attention_3_scaled_scores_0, y = transpose_0)[name = string("attention_3_masked_scaled_scores_0")]; int32 softmax_6_axis_0 = const()[name = string("softmax_6_axis_0"), val = int32(-2)]; tensor softmax_6 = softmax(axis = softmax_6_axis_0, x = attention_3_masked_scaled_scores_0)[name = string("softmax_6")]; bool attention_3_attention_0_transpose_x_0 = const()[name = string("attention_3_attention_0_transpose_x_0"), val = bool(true)]; bool attention_3_attention_0_transpose_y_0 = const()[name = string("attention_3_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_3_attention_0 = matmul(transpose_x = attention_3_attention_0_transpose_x_0, transpose_y = attention_3_attention_0_transpose_y_0, x = softmax_6, y = attention_3_slice_value_cache_heads_0)[name = string("attention_3_attention_0")]; bool attention_3_scores_1_transpose_y_0 = const()[name = string("attention_3_scores_1_transpose_y_0"), val = bool(true)]; bool attention_3_scores_1_transpose_x_0 = const()[name = string("attention_3_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_3_scores_1 = matmul(transpose_x = attention_3_scores_1_transpose_x_0, transpose_y = attention_3_scores_1_transpose_y_0, x = attention_3_key_cache_head_1, y = attention_3_q_splits_1)[name = string("attention_3_scores_1")]; fp16 attention_3_scaled_scores_1_y_0 = const()[name = string("attention_3_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_3_scaled_scores_1 = mul(x = attention_3_scores_1, y = attention_3_scaled_scores_1_y_0)[name = string("attention_3_scaled_scores_1")]; tensor attention_3_masked_scaled_scores_1 = add(x = attention_3_scaled_scores_1, y = transpose_0)[name = string("attention_3_masked_scaled_scores_1")]; int32 softmax_7_axis_0 = const()[name = string("softmax_7_axis_0"), val = int32(-2)]; tensor softmax_7 = softmax(axis = softmax_7_axis_0, x = attention_3_masked_scaled_scores_1)[name = string("softmax_7")]; bool attention_3_attention_1_transpose_x_0 = const()[name = string("attention_3_attention_1_transpose_x_0"), val = bool(true)]; bool attention_3_attention_1_transpose_y_0 = const()[name = string("attention_3_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_3_attention_1 = matmul(transpose_x = attention_3_attention_1_transpose_x_0, transpose_y = attention_3_attention_1_transpose_y_0, x = softmax_7, y = attention_3_slice_value_cache_heads_1)[name = string("attention_3_attention_1")]; int32 attention_3_concat_attention_all_heads_axis_0 = const()[name = string("attention_3_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_3_concat_attention_all_heads_interleave_0 = const()[name = string("attention_3_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_3_concat_attention_all_heads = concat(axis = attention_3_concat_attention_all_heads_axis_0, interleave = attention_3_concat_attention_all_heads_interleave_0, values = (attention_3_attention_0, attention_3_attention_1))[name = string("attention_3_concat_attention_all_heads")]; tensor attention_3_channels_first_retransposed_perm_0 = const()[name = string("attention_3_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_3_reshaped_shape_0 = const()[name = string("attention_3_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_3_channels_first_retransposed = transpose(perm = attention_3_channels_first_retransposed_perm_0, x = attention_3_concat_attention_all_heads)[name = string("transpose_41")]; tensor attention_3_reshaped = reshape(shape = attention_3_reshaped_shape_0, x = attention_3_channels_first_retransposed)[name = string("attention_3_reshaped")]; tensor attention_3_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326807936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327410112))))[name = string("attention_3_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_11 = constexpr_blockwise_shift_scale(data = attention_3_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327438848))))[name = string("constexpr_blockwise_shift_scale_11")]; tensor attention_3_outproj_strides_0 = const()[name = string("attention_3_outproj_strides_0"), val = tensor([1])]; string attention_3_outproj_pad_type_0 = const()[name = string("attention_3_outproj_pad_type_0"), val = string("valid")]; tensor attention_3_outproj_pad_0 = const()[name = string("attention_3_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_3_outproj_dilations_0 = const()[name = string("attention_3_outproj_dilations_0"), val = tensor([1])]; int32 attention_3_outproj_groups_0 = const()[name = string("attention_3_outproj_groups_0"), val = int32(1)]; tensor attention_3_outproj = conv(dilations = attention_3_outproj_dilations_0, groups = attention_3_outproj_groups_0, pad = attention_3_outproj_pad_0, pad_type = attention_3_outproj_pad_type_0, strides = attention_3_outproj_strides_0, weight = constexpr_blockwise_shift_scale_11, x = attention_3_reshaped)[name = string("attention_3_outproj")]; tensor block_3_residual_1 = add(x = block_2_residual_2, y = attention_3_outproj)[name = string("block_3_residual_1")]; tensor block_3_ffn_rmsnorm_abs = abs(x = block_3_residual_1)[name = string("block_3_ffn_rmsnorm_abs")]; tensor block_3_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_3_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_3_ffn_rmsnorm_maxval = reduce_max(axes = block_3_ffn_rmsnorm_maxval_axes_0, keep_dims = block_3_ffn_rmsnorm_maxval_keep_dims_0, x = block_3_ffn_rmsnorm_abs)[name = string("block_3_ffn_rmsnorm_maxval")]; fp16 block_3_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_3_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_3_ffn_rmsnorm_maxval_clipped = clip(alpha = block_3_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_3_ffn_rmsnorm_maxval_clipped_beta_0, x = block_3_ffn_rmsnorm_maxval)[name = string("block_3_ffn_rmsnorm_maxval_clipped")]; tensor block_3_ffn_rmsnorm_scaled = real_div(x = block_3_residual_1, y = block_3_ffn_rmsnorm_maxval_clipped)[name = string("block_3_ffn_rmsnorm_scaled")]; tensor block_3_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_3_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_3_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_3_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_3_ffn_rmsnorm_scaled)[name = string("block_3_ffn_rmsnorm_squared_sum")]; fp16 block_3_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_3_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_3_ffn_rmsnorm_rsqrt_epsilon_0, x = block_3_ffn_rmsnorm_squared_sum)[name = string("block_3_ffn_rmsnorm_rsqrt")]; fp16 block_3_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_3_ffn_rmsnorm_dim_scaled = mul(x = block_3_ffn_rmsnorm_scaled, y = block_3_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_3_ffn_rmsnorm_dim_scaled")]; tensor block_3_ffn_rmsnorm_normalized = mul(x = block_3_ffn_rmsnorm_dim_scaled, y = block_3_ffn_rmsnorm_rsqrt)[name = string("block_3_ffn_rmsnorm_normalized")]; tensor block_3_ffn_rmsnorm_y_0 = const()[name = string("block_3_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327440704)))]; tensor block_3_ffn_rmsnorm = mul(x = block_3_ffn_rmsnorm_normalized, y = block_3_ffn_rmsnorm_y_0)[name = string("block_3_ffn_rmsnorm")]; tensor block_3_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327442560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330711232))))[name = string("block_3_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_12 = constexpr_blockwise_shift_scale(data = block_3_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330866944))))[name = string("constexpr_blockwise_shift_scale_12")]; tensor block_3_ffn_inproj_strides_0 = const()[name = string("block_3_ffn_inproj_strides_0"), val = tensor([1])]; string block_3_ffn_inproj_pad_type_0 = const()[name = string("block_3_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_3_ffn_inproj_pad_0 = const()[name = string("block_3_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_3_ffn_inproj_dilations_0 = const()[name = string("block_3_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_3_ffn_inproj_groups_0 = const()[name = string("block_3_ffn_inproj_groups_0"), val = int32(1)]; tensor block_3_ffn_inproj = conv(dilations = block_3_ffn_inproj_dilations_0, groups = block_3_ffn_inproj_groups_0, pad = block_3_ffn_inproj_pad_0, pad_type = block_3_ffn_inproj_pad_type_0, strides = block_3_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_12, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_inproj")]; tensor block_3_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330876736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334145408))))[name = string("block_3_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_13 = constexpr_blockwise_shift_scale(data = block_3_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334301120))))[name = string("constexpr_blockwise_shift_scale_13")]; tensor block_3_ffn_g_strides_0 = const()[name = string("block_3_ffn_g_strides_0"), val = tensor([1])]; string block_3_ffn_g_pad_type_0 = const()[name = string("block_3_ffn_g_pad_type_0"), val = string("valid")]; tensor block_3_ffn_g_pad_0 = const()[name = string("block_3_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_3_ffn_g_dilations_0 = const()[name = string("block_3_ffn_g_dilations_0"), val = tensor([1])]; int32 block_3_ffn_g_groups_0 = const()[name = string("block_3_ffn_g_groups_0"), val = int32(1)]; tensor block_3_ffn_g = conv(dilations = block_3_ffn_g_dilations_0, groups = block_3_ffn_g_groups_0, pad = block_3_ffn_g_pad_0, pad_type = block_3_ffn_g_pad_type_0, strides = block_3_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_13, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_g")]; tensor block_3_ffn_g_activation = silu(x = block_3_ffn_g)[name = string("block_3_ffn_g_activation")]; tensor block_3_ffn_x_gated = mul(x = block_3_ffn_inproj, y = block_3_ffn_g_activation)[name = string("block_3_ffn_x_gated")]; tensor block_3_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337579584))))[name = string("block_3_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_14 = constexpr_blockwise_shift_scale(data = block_3_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337608320))))[name = string("constexpr_blockwise_shift_scale_14")]; tensor block_3_ffn_outproj_strides_0 = const()[name = string("block_3_ffn_outproj_strides_0"), val = tensor([1])]; string block_3_ffn_outproj_pad_type_0 = const()[name = string("block_3_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_3_ffn_outproj_pad_0 = const()[name = string("block_3_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_3_ffn_outproj_dilations_0 = const()[name = string("block_3_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_3_ffn_outproj_groups_0 = const()[name = string("block_3_ffn_outproj_groups_0"), val = int32(1)]; tensor block_3_ffn_outproj = conv(dilations = block_3_ffn_outproj_dilations_0, groups = block_3_ffn_outproj_groups_0, pad = block_3_ffn_outproj_pad_0, pad_type = block_3_ffn_outproj_pad_type_0, strides = block_3_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_14, x = block_3_ffn_x_gated)[name = string("block_3_ffn_outproj")]; tensor block_3_residual_2 = add(x = block_3_ffn_outproj, y = block_3_residual_1)[name = string("block_3_residual_2")]; tensor block_4_attention_rmsnorm_abs = abs(x = block_3_residual_2)[name = string("block_4_attention_rmsnorm_abs")]; tensor block_4_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_4_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_4_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_4_attention_rmsnorm_maxval = reduce_max(axes = block_4_attention_rmsnorm_maxval_axes_0, keep_dims = block_4_attention_rmsnorm_maxval_keep_dims_0, x = block_4_attention_rmsnorm_abs)[name = string("block_4_attention_rmsnorm_maxval")]; fp16 block_4_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_4_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_4_attention_rmsnorm_maxval_clipped = clip(alpha = block_4_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_4_attention_rmsnorm_maxval_clipped_beta_0, x = block_4_attention_rmsnorm_maxval)[name = string("block_4_attention_rmsnorm_maxval_clipped")]; tensor block_4_attention_rmsnorm_scaled = real_div(x = block_3_residual_2, y = block_4_attention_rmsnorm_maxval_clipped)[name = string("block_4_attention_rmsnorm_scaled")]; tensor block_4_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_4_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_4_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_4_attention_rmsnorm_squared_sum_keep_dims_0, x = block_4_attention_rmsnorm_scaled)[name = string("block_4_attention_rmsnorm_squared_sum")]; fp16 block_4_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_4_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_4_attention_rmsnorm_rsqrt_epsilon_0, x = block_4_attention_rmsnorm_squared_sum)[name = string("block_4_attention_rmsnorm_rsqrt")]; fp16 block_4_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_4_attention_rmsnorm_dim_scaled = mul(x = block_4_attention_rmsnorm_scaled, y = block_4_attention_rmsnorm_dim_scaled_y_0)[name = string("block_4_attention_rmsnorm_dim_scaled")]; tensor block_4_attention_rmsnorm_normalized = mul(x = block_4_attention_rmsnorm_dim_scaled, y = block_4_attention_rmsnorm_rsqrt)[name = string("block_4_attention_rmsnorm_normalized")]; tensor block_4_attention_rmsnorm_y_0 = const()[name = string("block_4_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337610176)))]; tensor block_4_attention_rmsnorm = mul(x = block_4_attention_rmsnorm_normalized, y = block_4_attention_rmsnorm_y_0)[name = string("block_4_attention_rmsnorm")]; tensor attention_4_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337612032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338386240))))[name = string("attention_4_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_15 = constexpr_blockwise_shift_scale(data = attention_4_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338423168))))[name = string("constexpr_blockwise_shift_scale_15")]; tensor attention_4_qkvproj_bias_0 = const()[name = string("attention_4_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338425536)))]; tensor attention_4_qkvproj_strides_0 = const()[name = string("attention_4_qkvproj_strides_0"), val = tensor([1])]; string attention_4_qkvproj_pad_type_0 = const()[name = string("attention_4_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_4_qkvproj_pad_0 = const()[name = string("attention_4_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_4_qkvproj_dilations_0 = const()[name = string("attention_4_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_4_qkvproj_groups_0 = const()[name = string("attention_4_qkvproj_groups_0"), val = int32(1)]; tensor attention_4_qkvproj = conv(bias = attention_4_qkvproj_bias_0, dilations = attention_4_qkvproj_dilations_0, groups = attention_4_qkvproj_groups_0, pad = attention_4_qkvproj_pad_0, pad_type = attention_4_qkvproj_pad_type_0, strides = attention_4_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_15, x = block_4_attention_rmsnorm)[name = string("attention_4_qkvproj")]; tensor attention_4_head_reshape_shape_0 = const()[name = string("attention_4_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_4_head_reshape = reshape(shape = attention_4_head_reshape_shape_0, x = attention_4_qkvproj)[name = string("attention_4_head_reshape")]; tensor attention_4_head_transpose_perm_0 = const()[name = string("attention_4_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_4_split_qkv_heads_axis_0 = const()[name = string("attention_4_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_4_split_qkv_heads_split_sizes_0 = const()[name = string("attention_4_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_4_head_transpose = transpose(perm = attention_4_head_transpose_perm_0, x = attention_4_head_reshape)[name = string("transpose_40")]; tensor attention_4_split_qkv_heads_0, tensor attention_4_split_qkv_heads_1, tensor attention_4_split_qkv_heads_2 = split(axis = attention_4_split_qkv_heads_axis_0, split_sizes = attention_4_split_qkv_heads_split_sizes_0, x = attention_4_head_transpose)[name = string("attention_4_split_qkv_heads")]; tensor attention_4_q_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_4_q_rope_lhs_mult")]; int32 attention_4_q_rotate_half_split_num_splits_0 = const()[name = string("attention_4_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_4_q_rotate_half_split_axis_0 = const()[name = string("attention_4_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_4_q_rotate_half_split_0, tensor attention_4_q_rotate_half_split_1 = split(axis = attention_4_q_rotate_half_split_axis_0, num_splits = attention_4_q_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_0)[name = string("attention_4_q_rotate_half_split")]; fp16 attention_4_q_rotate_half_neg_y_0 = const()[name = string("attention_4_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_4_q_rotate_half_neg = mul(x = attention_4_q_rotate_half_split_1, y = attention_4_q_rotate_half_neg_y_0)[name = string("attention_4_q_rotate_half_neg")]; int32 attention_4_q_rotate_half_concat_axis_0 = const()[name = string("attention_4_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_4_q_rotate_half_concat_interleave_0 = const()[name = string("attention_4_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_4_q_rotate_half_concat = concat(axis = attention_4_q_rotate_half_concat_axis_0, interleave = attention_4_q_rotate_half_concat_interleave_0, values = (attention_4_q_rotate_half_neg, attention_4_q_rotate_half_split_0))[name = string("attention_4_q_rotate_half_concat")]; tensor attention_4_q_rope_rhs_mult = mul(x = attention_4_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_q_rope_rhs_mult")]; tensor attention_4_q_rope = add(x = attention_4_q_rope_lhs_mult, y = attention_4_q_rope_rhs_mult)[name = string("attention_4_q_rope")]; tensor attention_4_k_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_4_k_rope_lhs_mult")]; int32 attention_4_k_rotate_half_split_num_splits_0 = const()[name = string("attention_4_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_4_k_rotate_half_split_axis_0 = const()[name = string("attention_4_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_4_k_rotate_half_split_0, tensor attention_4_k_rotate_half_split_1 = split(axis = attention_4_k_rotate_half_split_axis_0, num_splits = attention_4_k_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_1)[name = string("attention_4_k_rotate_half_split")]; fp16 attention_4_k_rotate_half_neg_y_0 = const()[name = string("attention_4_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_4_k_rotate_half_neg = mul(x = attention_4_k_rotate_half_split_1, y = attention_4_k_rotate_half_neg_y_0)[name = string("attention_4_k_rotate_half_neg")]; int32 attention_4_k_rotate_half_concat_axis_0 = const()[name = string("attention_4_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_4_k_rotate_half_concat_interleave_0 = const()[name = string("attention_4_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_4_k_rotate_half_concat = concat(axis = attention_4_k_rotate_half_concat_axis_0, interleave = attention_4_k_rotate_half_concat_interleave_0, values = (attention_4_k_rotate_half_neg, attention_4_k_rotate_half_split_0))[name = string("attention_4_k_rotate_half_concat")]; tensor attention_4_k_rope_rhs_mult = mul(x = attention_4_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_k_rope_rhs_mult")]; tensor attention_4_k_rope = add(x = attention_4_k_rope_lhs_mult, y = attention_4_k_rope_rhs_mult)[name = string("attention_4_k_rope")]; int32 attention_4_q_splits_axis_0 = const()[name = string("attention_4_q_splits_axis_0"), val = int32(1)]; int32 attention_4_q_splits_num_splits_0 = const()[name = string("attention_4_q_splits_num_splits_0"), val = int32(2)]; tensor attention_4_q_splits_0, tensor attention_4_q_splits_1 = split(axis = attention_4_q_splits_axis_0, num_splits = attention_4_q_splits_num_splits_0, x = attention_4_q_rope)[name = string("attention_4_q_splits")]; tensor attention_4_update_begin_0_values0_0 = const()[name = string("attention_4_update_begin_0_values0_0"), val = tensor([4])]; tensor attention_4_update_begin_0_values1_0 = const()[name = string("attention_4_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_4_update_begin_0_values3_0 = const()[name = string("attention_4_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_4_update_begin_0_axis_0 = const()[name = string("attention_4_update_begin_0_axis_0"), val = int32(0)]; bool attention_4_update_begin_0_interleave_0 = const()[name = string("attention_4_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_4_update_begin_0 = concat(axis = attention_4_update_begin_0_axis_0, interleave = attention_4_update_begin_0_interleave_0, values = (attention_4_update_begin_0_values0_0, attention_4_update_begin_0_values1_0, query_pos1, attention_4_update_begin_0_values3_0))[name = string("attention_4_update_begin_0")]; tensor attention_4_update_end_0_values0_0 = const()[name = string("attention_4_update_end_0_values0_0"), val = tensor([5])]; tensor attention_4_update_end_0_values1_0 = const()[name = string("attention_4_update_end_0_values1_0"), val = tensor([2])]; tensor attention_4_update_end_0_values3_0 = const()[name = string("attention_4_update_end_0_values3_0"), val = tensor([64])]; int32 attention_4_update_end_0_axis_0 = const()[name = string("attention_4_update_end_0_axis_0"), val = int32(0)]; bool attention_4_update_end_0_interleave_0 = const()[name = string("attention_4_update_end_0_interleave_0"), val = bool(false)]; tensor attention_4_update_end_0 = concat(axis = attention_4_update_end_0_axis_0, interleave = attention_4_update_end_0_interleave_0, values = (attention_4_update_end_0_values0_0, attention_4_update_end_0_values1_0, end_pos_0, attention_4_update_end_0_values3_0))[name = string("attention_4_update_end_0")]; tensor attention_4_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_updated_key_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_key_cache_0_squeeze_mask_0, update = attention_4_k_rope, x = coreml_update_state_6)[name = string("attention_4_updated_key_cache_0")]; write_state(data = attention_4_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_8_write_state")]; tensor coreml_update_state_8 = read_state(input = key_cache_state)[name = string("coreml_update_state_8")]; tensor attention_4_key_cache_begin_0 = const()[name = string("attention_4_key_cache_begin_0"), val = tensor([4, 0, 0, 0])]; tensor attention_4_key_cache_end_0 = const()[name = string("attention_4_key_cache_end_0"), val = tensor([5, 2, 512, 64])]; tensor attention_4_key_cache_squeeze_mask_0 = const()[name = string("attention_4_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_key_cache = slice_by_index(begin = attention_4_key_cache_begin_0, end = attention_4_key_cache_end_0, squeeze_mask = attention_4_key_cache_squeeze_mask_0, x = coreml_update_state_8)[name = string("attention_4_key_cache")]; int32 attention_4_key_cache_head_axis_0 = const()[name = string("attention_4_key_cache_head_axis_0"), val = int32(1)]; int32 attention_4_key_cache_head_num_splits_0 = const()[name = string("attention_4_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_4_key_cache_head_0, tensor attention_4_key_cache_head_1 = split(axis = attention_4_key_cache_head_axis_0, num_splits = attention_4_key_cache_head_num_splits_0, x = attention_4_key_cache)[name = string("attention_4_key_cache_head")]; tensor attention_4_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_updated_value_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_value_cache_0_squeeze_mask_0, update = attention_4_split_qkv_heads_2, x = coreml_update_state_7)[name = string("attention_4_updated_value_cache_0")]; write_state(data = attention_4_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_9_write_state")]; tensor coreml_update_state_9 = read_state(input = value_cache_state)[name = string("coreml_update_state_9")]; tensor attention_4_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_4_slice_current_layer_value_cache_begin_0"), val = tensor([4, 0, 0, 0])]; tensor attention_4_slice_current_layer_value_cache_end_0 = const()[name = string("attention_4_slice_current_layer_value_cache_end_0"), val = tensor([5, 2, 512, 64])]; tensor attention_4_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_4_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_slice_current_layer_value_cache = slice_by_index(begin = attention_4_slice_current_layer_value_cache_begin_0, end = attention_4_slice_current_layer_value_cache_end_0, squeeze_mask = attention_4_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_9)[name = string("attention_4_slice_current_layer_value_cache")]; int32 attention_4_slice_value_cache_heads_axis_0 = const()[name = string("attention_4_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_4_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_4_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_4_slice_value_cache_heads_0, tensor attention_4_slice_value_cache_heads_1 = split(axis = attention_4_slice_value_cache_heads_axis_0, num_splits = attention_4_slice_value_cache_heads_num_splits_0, x = attention_4_slice_current_layer_value_cache)[name = string("attention_4_slice_value_cache_heads")]; bool attention_4_scores_0_transpose_y_0 = const()[name = string("attention_4_scores_0_transpose_y_0"), val = bool(true)]; bool attention_4_scores_0_transpose_x_0 = const()[name = string("attention_4_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_4_scores_0 = matmul(transpose_x = attention_4_scores_0_transpose_x_0, transpose_y = attention_4_scores_0_transpose_y_0, x = attention_4_key_cache_head_0, y = attention_4_q_splits_0)[name = string("attention_4_scores_0")]; fp16 attention_4_scaled_scores_0_y_0 = const()[name = string("attention_4_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_4_scaled_scores_0 = mul(x = attention_4_scores_0, y = attention_4_scaled_scores_0_y_0)[name = string("attention_4_scaled_scores_0")]; tensor attention_4_masked_scaled_scores_0 = add(x = attention_4_scaled_scores_0, y = transpose_0)[name = string("attention_4_masked_scaled_scores_0")]; int32 softmax_8_axis_0 = const()[name = string("softmax_8_axis_0"), val = int32(-2)]; tensor softmax_8 = softmax(axis = softmax_8_axis_0, x = attention_4_masked_scaled_scores_0)[name = string("softmax_8")]; bool attention_4_attention_0_transpose_x_0 = const()[name = string("attention_4_attention_0_transpose_x_0"), val = bool(true)]; bool attention_4_attention_0_transpose_y_0 = const()[name = string("attention_4_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_4_attention_0 = matmul(transpose_x = attention_4_attention_0_transpose_x_0, transpose_y = attention_4_attention_0_transpose_y_0, x = softmax_8, y = attention_4_slice_value_cache_heads_0)[name = string("attention_4_attention_0")]; bool attention_4_scores_1_transpose_y_0 = const()[name = string("attention_4_scores_1_transpose_y_0"), val = bool(true)]; bool attention_4_scores_1_transpose_x_0 = const()[name = string("attention_4_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_4_scores_1 = matmul(transpose_x = attention_4_scores_1_transpose_x_0, transpose_y = attention_4_scores_1_transpose_y_0, x = attention_4_key_cache_head_1, y = attention_4_q_splits_1)[name = string("attention_4_scores_1")]; fp16 attention_4_scaled_scores_1_y_0 = const()[name = string("attention_4_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_4_scaled_scores_1 = mul(x = attention_4_scores_1, y = attention_4_scaled_scores_1_y_0)[name = string("attention_4_scaled_scores_1")]; tensor attention_4_masked_scaled_scores_1 = add(x = attention_4_scaled_scores_1, y = transpose_0)[name = string("attention_4_masked_scaled_scores_1")]; int32 softmax_9_axis_0 = const()[name = string("softmax_9_axis_0"), val = int32(-2)]; tensor softmax_9 = softmax(axis = softmax_9_axis_0, x = attention_4_masked_scaled_scores_1)[name = string("softmax_9")]; bool attention_4_attention_1_transpose_x_0 = const()[name = string("attention_4_attention_1_transpose_x_0"), val = bool(true)]; bool attention_4_attention_1_transpose_y_0 = const()[name = string("attention_4_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_4_attention_1 = matmul(transpose_x = attention_4_attention_1_transpose_x_0, transpose_y = attention_4_attention_1_transpose_y_0, x = softmax_9, y = attention_4_slice_value_cache_heads_1)[name = string("attention_4_attention_1")]; int32 attention_4_concat_attention_all_heads_axis_0 = const()[name = string("attention_4_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_4_concat_attention_all_heads_interleave_0 = const()[name = string("attention_4_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_4_concat_attention_all_heads = concat(axis = attention_4_concat_attention_all_heads_axis_0, interleave = attention_4_concat_attention_all_heads_interleave_0, values = (attention_4_attention_0, attention_4_attention_1))[name = string("attention_4_concat_attention_all_heads")]; tensor attention_4_channels_first_retransposed_perm_0 = const()[name = string("attention_4_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_4_reshaped_shape_0 = const()[name = string("attention_4_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_4_channels_first_retransposed = transpose(perm = attention_4_channels_first_retransposed_perm_0, x = attention_4_concat_attention_all_heads)[name = string("transpose_39")]; tensor attention_4_reshaped = reshape(shape = attention_4_reshaped_shape_0, x = attention_4_channels_first_retransposed)[name = string("attention_4_reshaped")]; tensor attention_4_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338427904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339030080))))[name = string("attention_4_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_16 = constexpr_blockwise_shift_scale(data = attention_4_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339058816))))[name = string("constexpr_blockwise_shift_scale_16")]; tensor attention_4_outproj_strides_0 = const()[name = string("attention_4_outproj_strides_0"), val = tensor([1])]; string attention_4_outproj_pad_type_0 = const()[name = string("attention_4_outproj_pad_type_0"), val = string("valid")]; tensor attention_4_outproj_pad_0 = const()[name = string("attention_4_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_4_outproj_dilations_0 = const()[name = string("attention_4_outproj_dilations_0"), val = tensor([1])]; int32 attention_4_outproj_groups_0 = const()[name = string("attention_4_outproj_groups_0"), val = int32(1)]; tensor attention_4_outproj = conv(dilations = attention_4_outproj_dilations_0, groups = attention_4_outproj_groups_0, pad = attention_4_outproj_pad_0, pad_type = attention_4_outproj_pad_type_0, strides = attention_4_outproj_strides_0, weight = constexpr_blockwise_shift_scale_16, x = attention_4_reshaped)[name = string("attention_4_outproj")]; tensor block_4_residual_1 = add(x = block_3_residual_2, y = attention_4_outproj)[name = string("block_4_residual_1")]; tensor block_4_ffn_rmsnorm_abs = abs(x = block_4_residual_1)[name = string("block_4_ffn_rmsnorm_abs")]; tensor block_4_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_4_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_4_ffn_rmsnorm_maxval = reduce_max(axes = block_4_ffn_rmsnorm_maxval_axes_0, keep_dims = block_4_ffn_rmsnorm_maxval_keep_dims_0, x = block_4_ffn_rmsnorm_abs)[name = string("block_4_ffn_rmsnorm_maxval")]; fp16 block_4_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_4_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_4_ffn_rmsnorm_maxval_clipped = clip(alpha = block_4_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_4_ffn_rmsnorm_maxval_clipped_beta_0, x = block_4_ffn_rmsnorm_maxval)[name = string("block_4_ffn_rmsnorm_maxval_clipped")]; tensor block_4_ffn_rmsnorm_scaled = real_div(x = block_4_residual_1, y = block_4_ffn_rmsnorm_maxval_clipped)[name = string("block_4_ffn_rmsnorm_scaled")]; tensor block_4_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_4_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_4_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_4_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_4_ffn_rmsnorm_scaled)[name = string("block_4_ffn_rmsnorm_squared_sum")]; fp16 block_4_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_4_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_4_ffn_rmsnorm_rsqrt_epsilon_0, x = block_4_ffn_rmsnorm_squared_sum)[name = string("block_4_ffn_rmsnorm_rsqrt")]; fp16 block_4_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_4_ffn_rmsnorm_dim_scaled = mul(x = block_4_ffn_rmsnorm_scaled, y = block_4_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_4_ffn_rmsnorm_dim_scaled")]; tensor block_4_ffn_rmsnorm_normalized = mul(x = block_4_ffn_rmsnorm_dim_scaled, y = block_4_ffn_rmsnorm_rsqrt)[name = string("block_4_ffn_rmsnorm_normalized")]; tensor block_4_ffn_rmsnorm_y_0 = const()[name = string("block_4_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339060672)))]; tensor block_4_ffn_rmsnorm = mul(x = block_4_ffn_rmsnorm_normalized, y = block_4_ffn_rmsnorm_y_0)[name = string("block_4_ffn_rmsnorm")]; tensor block_4_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339062528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342331200))))[name = string("block_4_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_17 = constexpr_blockwise_shift_scale(data = block_4_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342486912))))[name = string("constexpr_blockwise_shift_scale_17")]; tensor block_4_ffn_inproj_strides_0 = const()[name = string("block_4_ffn_inproj_strides_0"), val = tensor([1])]; string block_4_ffn_inproj_pad_type_0 = const()[name = string("block_4_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_4_ffn_inproj_pad_0 = const()[name = string("block_4_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_4_ffn_inproj_dilations_0 = const()[name = string("block_4_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_4_ffn_inproj_groups_0 = const()[name = string("block_4_ffn_inproj_groups_0"), val = int32(1)]; tensor block_4_ffn_inproj = conv(dilations = block_4_ffn_inproj_dilations_0, groups = block_4_ffn_inproj_groups_0, pad = block_4_ffn_inproj_pad_0, pad_type = block_4_ffn_inproj_pad_type_0, strides = block_4_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_17, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_inproj")]; tensor block_4_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342496704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345765376))))[name = string("block_4_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_18 = constexpr_blockwise_shift_scale(data = block_4_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345921088))))[name = string("constexpr_blockwise_shift_scale_18")]; tensor block_4_ffn_g_strides_0 = const()[name = string("block_4_ffn_g_strides_0"), val = tensor([1])]; string block_4_ffn_g_pad_type_0 = const()[name = string("block_4_ffn_g_pad_type_0"), val = string("valid")]; tensor block_4_ffn_g_pad_0 = const()[name = string("block_4_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_4_ffn_g_dilations_0 = const()[name = string("block_4_ffn_g_dilations_0"), val = tensor([1])]; int32 block_4_ffn_g_groups_0 = const()[name = string("block_4_ffn_g_groups_0"), val = int32(1)]; tensor block_4_ffn_g = conv(dilations = block_4_ffn_g_dilations_0, groups = block_4_ffn_g_groups_0, pad = block_4_ffn_g_pad_0, pad_type = block_4_ffn_g_pad_type_0, strides = block_4_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_18, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_g")]; tensor block_4_ffn_g_activation = silu(x = block_4_ffn_g)[name = string("block_4_ffn_g_activation")]; tensor block_4_ffn_x_gated = mul(x = block_4_ffn_inproj, y = block_4_ffn_g_activation)[name = string("block_4_ffn_x_gated")]; tensor block_4_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345930880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349199552))))[name = string("block_4_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_19 = constexpr_blockwise_shift_scale(data = block_4_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349228288))))[name = string("constexpr_blockwise_shift_scale_19")]; tensor block_4_ffn_outproj_strides_0 = const()[name = string("block_4_ffn_outproj_strides_0"), val = tensor([1])]; string block_4_ffn_outproj_pad_type_0 = const()[name = string("block_4_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_4_ffn_outproj_pad_0 = const()[name = string("block_4_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_4_ffn_outproj_dilations_0 = const()[name = string("block_4_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_4_ffn_outproj_groups_0 = const()[name = string("block_4_ffn_outproj_groups_0"), val = int32(1)]; tensor block_4_ffn_outproj = conv(dilations = block_4_ffn_outproj_dilations_0, groups = block_4_ffn_outproj_groups_0, pad = block_4_ffn_outproj_pad_0, pad_type = block_4_ffn_outproj_pad_type_0, strides = block_4_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_19, x = block_4_ffn_x_gated)[name = string("block_4_ffn_outproj")]; tensor block_4_residual_2 = add(x = block_4_ffn_outproj, y = block_4_residual_1)[name = string("block_4_residual_2")]; tensor block_5_attention_rmsnorm_abs = abs(x = block_4_residual_2)[name = string("block_5_attention_rmsnorm_abs")]; tensor block_5_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_5_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_5_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_5_attention_rmsnorm_maxval = reduce_max(axes = block_5_attention_rmsnorm_maxval_axes_0, keep_dims = block_5_attention_rmsnorm_maxval_keep_dims_0, x = block_5_attention_rmsnorm_abs)[name = string("block_5_attention_rmsnorm_maxval")]; fp16 block_5_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_5_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_5_attention_rmsnorm_maxval_clipped = clip(alpha = block_5_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_5_attention_rmsnorm_maxval_clipped_beta_0, x = block_5_attention_rmsnorm_maxval)[name = string("block_5_attention_rmsnorm_maxval_clipped")]; tensor block_5_attention_rmsnorm_scaled = real_div(x = block_4_residual_2, y = block_5_attention_rmsnorm_maxval_clipped)[name = string("block_5_attention_rmsnorm_scaled")]; tensor block_5_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_5_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_5_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_5_attention_rmsnorm_squared_sum_keep_dims_0, x = block_5_attention_rmsnorm_scaled)[name = string("block_5_attention_rmsnorm_squared_sum")]; fp16 block_5_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_5_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_5_attention_rmsnorm_rsqrt_epsilon_0, x = block_5_attention_rmsnorm_squared_sum)[name = string("block_5_attention_rmsnorm_rsqrt")]; fp16 block_5_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_5_attention_rmsnorm_dim_scaled = mul(x = block_5_attention_rmsnorm_scaled, y = block_5_attention_rmsnorm_dim_scaled_y_0)[name = string("block_5_attention_rmsnorm_dim_scaled")]; tensor block_5_attention_rmsnorm_normalized = mul(x = block_5_attention_rmsnorm_dim_scaled, y = block_5_attention_rmsnorm_rsqrt)[name = string("block_5_attention_rmsnorm_normalized")]; tensor block_5_attention_rmsnorm_y_0 = const()[name = string("block_5_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349230144)))]; tensor block_5_attention_rmsnorm = mul(x = block_5_attention_rmsnorm_normalized, y = block_5_attention_rmsnorm_y_0)[name = string("block_5_attention_rmsnorm")]; tensor attention_5_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349232000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350006208))))[name = string("attention_5_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_20 = constexpr_blockwise_shift_scale(data = attention_5_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350043136))))[name = string("constexpr_blockwise_shift_scale_20")]; tensor attention_5_qkvproj_bias_0 = const()[name = string("attention_5_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350045504)))]; tensor attention_5_qkvproj_strides_0 = const()[name = string("attention_5_qkvproj_strides_0"), val = tensor([1])]; string attention_5_qkvproj_pad_type_0 = const()[name = string("attention_5_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_5_qkvproj_pad_0 = const()[name = string("attention_5_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_5_qkvproj_dilations_0 = const()[name = string("attention_5_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_5_qkvproj_groups_0 = const()[name = string("attention_5_qkvproj_groups_0"), val = int32(1)]; tensor attention_5_qkvproj = conv(bias = attention_5_qkvproj_bias_0, dilations = attention_5_qkvproj_dilations_0, groups = attention_5_qkvproj_groups_0, pad = attention_5_qkvproj_pad_0, pad_type = attention_5_qkvproj_pad_type_0, strides = attention_5_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_20, x = block_5_attention_rmsnorm)[name = string("attention_5_qkvproj")]; tensor attention_5_head_reshape_shape_0 = const()[name = string("attention_5_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_5_head_reshape = reshape(shape = attention_5_head_reshape_shape_0, x = attention_5_qkvproj)[name = string("attention_5_head_reshape")]; tensor attention_5_head_transpose_perm_0 = const()[name = string("attention_5_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_5_split_qkv_heads_axis_0 = const()[name = string("attention_5_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_5_split_qkv_heads_split_sizes_0 = const()[name = string("attention_5_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_5_head_transpose = transpose(perm = attention_5_head_transpose_perm_0, x = attention_5_head_reshape)[name = string("transpose_38")]; tensor attention_5_split_qkv_heads_0, tensor attention_5_split_qkv_heads_1, tensor attention_5_split_qkv_heads_2 = split(axis = attention_5_split_qkv_heads_axis_0, split_sizes = attention_5_split_qkv_heads_split_sizes_0, x = attention_5_head_transpose)[name = string("attention_5_split_qkv_heads")]; tensor attention_5_q_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_5_q_rope_lhs_mult")]; int32 attention_5_q_rotate_half_split_num_splits_0 = const()[name = string("attention_5_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_5_q_rotate_half_split_axis_0 = const()[name = string("attention_5_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_5_q_rotate_half_split_0, tensor attention_5_q_rotate_half_split_1 = split(axis = attention_5_q_rotate_half_split_axis_0, num_splits = attention_5_q_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_0)[name = string("attention_5_q_rotate_half_split")]; fp16 attention_5_q_rotate_half_neg_y_0 = const()[name = string("attention_5_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_5_q_rotate_half_neg = mul(x = attention_5_q_rotate_half_split_1, y = attention_5_q_rotate_half_neg_y_0)[name = string("attention_5_q_rotate_half_neg")]; int32 attention_5_q_rotate_half_concat_axis_0 = const()[name = string("attention_5_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_5_q_rotate_half_concat_interleave_0 = const()[name = string("attention_5_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_5_q_rotate_half_concat = concat(axis = attention_5_q_rotate_half_concat_axis_0, interleave = attention_5_q_rotate_half_concat_interleave_0, values = (attention_5_q_rotate_half_neg, attention_5_q_rotate_half_split_0))[name = string("attention_5_q_rotate_half_concat")]; tensor attention_5_q_rope_rhs_mult = mul(x = attention_5_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_q_rope_rhs_mult")]; tensor attention_5_q_rope = add(x = attention_5_q_rope_lhs_mult, y = attention_5_q_rope_rhs_mult)[name = string("attention_5_q_rope")]; tensor attention_5_k_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_5_k_rope_lhs_mult")]; int32 attention_5_k_rotate_half_split_num_splits_0 = const()[name = string("attention_5_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_5_k_rotate_half_split_axis_0 = const()[name = string("attention_5_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_5_k_rotate_half_split_0, tensor attention_5_k_rotate_half_split_1 = split(axis = attention_5_k_rotate_half_split_axis_0, num_splits = attention_5_k_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_1)[name = string("attention_5_k_rotate_half_split")]; fp16 attention_5_k_rotate_half_neg_y_0 = const()[name = string("attention_5_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_5_k_rotate_half_neg = mul(x = attention_5_k_rotate_half_split_1, y = attention_5_k_rotate_half_neg_y_0)[name = string("attention_5_k_rotate_half_neg")]; int32 attention_5_k_rotate_half_concat_axis_0 = const()[name = string("attention_5_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_5_k_rotate_half_concat_interleave_0 = const()[name = string("attention_5_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_5_k_rotate_half_concat = concat(axis = attention_5_k_rotate_half_concat_axis_0, interleave = attention_5_k_rotate_half_concat_interleave_0, values = (attention_5_k_rotate_half_neg, attention_5_k_rotate_half_split_0))[name = string("attention_5_k_rotate_half_concat")]; tensor attention_5_k_rope_rhs_mult = mul(x = attention_5_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_k_rope_rhs_mult")]; tensor attention_5_k_rope = add(x = attention_5_k_rope_lhs_mult, y = attention_5_k_rope_rhs_mult)[name = string("attention_5_k_rope")]; int32 attention_5_q_splits_axis_0 = const()[name = string("attention_5_q_splits_axis_0"), val = int32(1)]; int32 attention_5_q_splits_num_splits_0 = const()[name = string("attention_5_q_splits_num_splits_0"), val = int32(2)]; tensor attention_5_q_splits_0, tensor attention_5_q_splits_1 = split(axis = attention_5_q_splits_axis_0, num_splits = attention_5_q_splits_num_splits_0, x = attention_5_q_rope)[name = string("attention_5_q_splits")]; tensor attention_5_update_begin_0_values0_0 = const()[name = string("attention_5_update_begin_0_values0_0"), val = tensor([5])]; tensor attention_5_update_begin_0_values1_0 = const()[name = string("attention_5_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_5_update_begin_0_values3_0 = const()[name = string("attention_5_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_5_update_begin_0_axis_0 = const()[name = string("attention_5_update_begin_0_axis_0"), val = int32(0)]; bool attention_5_update_begin_0_interleave_0 = const()[name = string("attention_5_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_5_update_begin_0 = concat(axis = attention_5_update_begin_0_axis_0, interleave = attention_5_update_begin_0_interleave_0, values = (attention_5_update_begin_0_values0_0, attention_5_update_begin_0_values1_0, query_pos1, attention_5_update_begin_0_values3_0))[name = string("attention_5_update_begin_0")]; tensor attention_5_update_end_0_values0_0 = const()[name = string("attention_5_update_end_0_values0_0"), val = tensor([6])]; tensor attention_5_update_end_0_values1_0 = const()[name = string("attention_5_update_end_0_values1_0"), val = tensor([2])]; tensor attention_5_update_end_0_values3_0 = const()[name = string("attention_5_update_end_0_values3_0"), val = tensor([64])]; int32 attention_5_update_end_0_axis_0 = const()[name = string("attention_5_update_end_0_axis_0"), val = int32(0)]; bool attention_5_update_end_0_interleave_0 = const()[name = string("attention_5_update_end_0_interleave_0"), val = bool(false)]; tensor attention_5_update_end_0 = concat(axis = attention_5_update_end_0_axis_0, interleave = attention_5_update_end_0_interleave_0, values = (attention_5_update_end_0_values0_0, attention_5_update_end_0_values1_0, end_pos_0, attention_5_update_end_0_values3_0))[name = string("attention_5_update_end_0")]; tensor attention_5_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_updated_key_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_key_cache_0_squeeze_mask_0, update = attention_5_k_rope, x = coreml_update_state_8)[name = string("attention_5_updated_key_cache_0")]; write_state(data = attention_5_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_10_write_state")]; tensor coreml_update_state_10 = read_state(input = key_cache_state)[name = string("coreml_update_state_10")]; tensor attention_5_key_cache_begin_0 = const()[name = string("attention_5_key_cache_begin_0"), val = tensor([5, 0, 0, 0])]; tensor attention_5_key_cache_end_0 = const()[name = string("attention_5_key_cache_end_0"), val = tensor([6, 2, 512, 64])]; tensor attention_5_key_cache_squeeze_mask_0 = const()[name = string("attention_5_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_key_cache = slice_by_index(begin = attention_5_key_cache_begin_0, end = attention_5_key_cache_end_0, squeeze_mask = attention_5_key_cache_squeeze_mask_0, x = coreml_update_state_10)[name = string("attention_5_key_cache")]; int32 attention_5_key_cache_head_axis_0 = const()[name = string("attention_5_key_cache_head_axis_0"), val = int32(1)]; int32 attention_5_key_cache_head_num_splits_0 = const()[name = string("attention_5_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_5_key_cache_head_0, tensor attention_5_key_cache_head_1 = split(axis = attention_5_key_cache_head_axis_0, num_splits = attention_5_key_cache_head_num_splits_0, x = attention_5_key_cache)[name = string("attention_5_key_cache_head")]; tensor attention_5_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_updated_value_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_value_cache_0_squeeze_mask_0, update = attention_5_split_qkv_heads_2, x = coreml_update_state_9)[name = string("attention_5_updated_value_cache_0")]; write_state(data = attention_5_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_11_write_state")]; tensor coreml_update_state_11 = read_state(input = value_cache_state)[name = string("coreml_update_state_11")]; tensor attention_5_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_5_slice_current_layer_value_cache_begin_0"), val = tensor([5, 0, 0, 0])]; tensor attention_5_slice_current_layer_value_cache_end_0 = const()[name = string("attention_5_slice_current_layer_value_cache_end_0"), val = tensor([6, 2, 512, 64])]; tensor attention_5_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_5_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_slice_current_layer_value_cache = slice_by_index(begin = attention_5_slice_current_layer_value_cache_begin_0, end = attention_5_slice_current_layer_value_cache_end_0, squeeze_mask = attention_5_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_11)[name = string("attention_5_slice_current_layer_value_cache")]; int32 attention_5_slice_value_cache_heads_axis_0 = const()[name = string("attention_5_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_5_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_5_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_5_slice_value_cache_heads_0, tensor attention_5_slice_value_cache_heads_1 = split(axis = attention_5_slice_value_cache_heads_axis_0, num_splits = attention_5_slice_value_cache_heads_num_splits_0, x = attention_5_slice_current_layer_value_cache)[name = string("attention_5_slice_value_cache_heads")]; bool attention_5_scores_0_transpose_y_0 = const()[name = string("attention_5_scores_0_transpose_y_0"), val = bool(true)]; bool attention_5_scores_0_transpose_x_0 = const()[name = string("attention_5_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_5_scores_0 = matmul(transpose_x = attention_5_scores_0_transpose_x_0, transpose_y = attention_5_scores_0_transpose_y_0, x = attention_5_key_cache_head_0, y = attention_5_q_splits_0)[name = string("attention_5_scores_0")]; fp16 attention_5_scaled_scores_0_y_0 = const()[name = string("attention_5_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_5_scaled_scores_0 = mul(x = attention_5_scores_0, y = attention_5_scaled_scores_0_y_0)[name = string("attention_5_scaled_scores_0")]; tensor attention_5_masked_scaled_scores_0 = add(x = attention_5_scaled_scores_0, y = transpose_0)[name = string("attention_5_masked_scaled_scores_0")]; int32 softmax_10_axis_0 = const()[name = string("softmax_10_axis_0"), val = int32(-2)]; tensor softmax_10 = softmax(axis = softmax_10_axis_0, x = attention_5_masked_scaled_scores_0)[name = string("softmax_10")]; bool attention_5_attention_0_transpose_x_0 = const()[name = string("attention_5_attention_0_transpose_x_0"), val = bool(true)]; bool attention_5_attention_0_transpose_y_0 = const()[name = string("attention_5_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_5_attention_0 = matmul(transpose_x = attention_5_attention_0_transpose_x_0, transpose_y = attention_5_attention_0_transpose_y_0, x = softmax_10, y = attention_5_slice_value_cache_heads_0)[name = string("attention_5_attention_0")]; bool attention_5_scores_1_transpose_y_0 = const()[name = string("attention_5_scores_1_transpose_y_0"), val = bool(true)]; bool attention_5_scores_1_transpose_x_0 = const()[name = string("attention_5_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_5_scores_1 = matmul(transpose_x = attention_5_scores_1_transpose_x_0, transpose_y = attention_5_scores_1_transpose_y_0, x = attention_5_key_cache_head_1, y = attention_5_q_splits_1)[name = string("attention_5_scores_1")]; fp16 attention_5_scaled_scores_1_y_0 = const()[name = string("attention_5_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_5_scaled_scores_1 = mul(x = attention_5_scores_1, y = attention_5_scaled_scores_1_y_0)[name = string("attention_5_scaled_scores_1")]; tensor attention_5_masked_scaled_scores_1 = add(x = attention_5_scaled_scores_1, y = transpose_0)[name = string("attention_5_masked_scaled_scores_1")]; int32 softmax_11_axis_0 = const()[name = string("softmax_11_axis_0"), val = int32(-2)]; tensor softmax_11 = softmax(axis = softmax_11_axis_0, x = attention_5_masked_scaled_scores_1)[name = string("softmax_11")]; bool attention_5_attention_1_transpose_x_0 = const()[name = string("attention_5_attention_1_transpose_x_0"), val = bool(true)]; bool attention_5_attention_1_transpose_y_0 = const()[name = string("attention_5_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_5_attention_1 = matmul(transpose_x = attention_5_attention_1_transpose_x_0, transpose_y = attention_5_attention_1_transpose_y_0, x = softmax_11, y = attention_5_slice_value_cache_heads_1)[name = string("attention_5_attention_1")]; int32 attention_5_concat_attention_all_heads_axis_0 = const()[name = string("attention_5_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_5_concat_attention_all_heads_interleave_0 = const()[name = string("attention_5_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_5_concat_attention_all_heads = concat(axis = attention_5_concat_attention_all_heads_axis_0, interleave = attention_5_concat_attention_all_heads_interleave_0, values = (attention_5_attention_0, attention_5_attention_1))[name = string("attention_5_concat_attention_all_heads")]; tensor attention_5_channels_first_retransposed_perm_0 = const()[name = string("attention_5_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_5_reshaped_shape_0 = const()[name = string("attention_5_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_5_channels_first_retransposed = transpose(perm = attention_5_channels_first_retransposed_perm_0, x = attention_5_concat_attention_all_heads)[name = string("transpose_37")]; tensor attention_5_reshaped = reshape(shape = attention_5_reshaped_shape_0, x = attention_5_channels_first_retransposed)[name = string("attention_5_reshaped")]; tensor attention_5_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350047872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350650048))))[name = string("attention_5_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_21 = constexpr_blockwise_shift_scale(data = attention_5_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350678784))))[name = string("constexpr_blockwise_shift_scale_21")]; tensor attention_5_outproj_strides_0 = const()[name = string("attention_5_outproj_strides_0"), val = tensor([1])]; string attention_5_outproj_pad_type_0 = const()[name = string("attention_5_outproj_pad_type_0"), val = string("valid")]; tensor attention_5_outproj_pad_0 = const()[name = string("attention_5_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_5_outproj_dilations_0 = const()[name = string("attention_5_outproj_dilations_0"), val = tensor([1])]; int32 attention_5_outproj_groups_0 = const()[name = string("attention_5_outproj_groups_0"), val = int32(1)]; tensor attention_5_outproj = conv(dilations = attention_5_outproj_dilations_0, groups = attention_5_outproj_groups_0, pad = attention_5_outproj_pad_0, pad_type = attention_5_outproj_pad_type_0, strides = attention_5_outproj_strides_0, weight = constexpr_blockwise_shift_scale_21, x = attention_5_reshaped)[name = string("attention_5_outproj")]; tensor block_5_residual_1 = add(x = block_4_residual_2, y = attention_5_outproj)[name = string("block_5_residual_1")]; tensor block_5_ffn_rmsnorm_abs = abs(x = block_5_residual_1)[name = string("block_5_ffn_rmsnorm_abs")]; tensor block_5_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_5_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_5_ffn_rmsnorm_maxval = reduce_max(axes = block_5_ffn_rmsnorm_maxval_axes_0, keep_dims = block_5_ffn_rmsnorm_maxval_keep_dims_0, x = block_5_ffn_rmsnorm_abs)[name = string("block_5_ffn_rmsnorm_maxval")]; fp16 block_5_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_5_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_5_ffn_rmsnorm_maxval_clipped = clip(alpha = block_5_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_5_ffn_rmsnorm_maxval_clipped_beta_0, x = block_5_ffn_rmsnorm_maxval)[name = string("block_5_ffn_rmsnorm_maxval_clipped")]; tensor block_5_ffn_rmsnorm_scaled = real_div(x = block_5_residual_1, y = block_5_ffn_rmsnorm_maxval_clipped)[name = string("block_5_ffn_rmsnorm_scaled")]; tensor block_5_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_5_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_5_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_5_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_5_ffn_rmsnorm_scaled)[name = string("block_5_ffn_rmsnorm_squared_sum")]; fp16 block_5_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_5_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_5_ffn_rmsnorm_rsqrt_epsilon_0, x = block_5_ffn_rmsnorm_squared_sum)[name = string("block_5_ffn_rmsnorm_rsqrt")]; fp16 block_5_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_5_ffn_rmsnorm_dim_scaled = mul(x = block_5_ffn_rmsnorm_scaled, y = block_5_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_5_ffn_rmsnorm_dim_scaled")]; tensor block_5_ffn_rmsnorm_normalized = mul(x = block_5_ffn_rmsnorm_dim_scaled, y = block_5_ffn_rmsnorm_rsqrt)[name = string("block_5_ffn_rmsnorm_normalized")]; tensor block_5_ffn_rmsnorm_y_0 = const()[name = string("block_5_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350680640)))]; tensor block_5_ffn_rmsnorm = mul(x = block_5_ffn_rmsnorm_normalized, y = block_5_ffn_rmsnorm_y_0)[name = string("block_5_ffn_rmsnorm")]; tensor block_5_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350682496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353951168))))[name = string("block_5_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_22 = constexpr_blockwise_shift_scale(data = block_5_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354106880))))[name = string("constexpr_blockwise_shift_scale_22")]; tensor block_5_ffn_inproj_strides_0 = const()[name = string("block_5_ffn_inproj_strides_0"), val = tensor([1])]; string block_5_ffn_inproj_pad_type_0 = const()[name = string("block_5_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_5_ffn_inproj_pad_0 = const()[name = string("block_5_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_5_ffn_inproj_dilations_0 = const()[name = string("block_5_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_5_ffn_inproj_groups_0 = const()[name = string("block_5_ffn_inproj_groups_0"), val = int32(1)]; tensor block_5_ffn_inproj = conv(dilations = block_5_ffn_inproj_dilations_0, groups = block_5_ffn_inproj_groups_0, pad = block_5_ffn_inproj_pad_0, pad_type = block_5_ffn_inproj_pad_type_0, strides = block_5_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_22, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_inproj")]; tensor block_5_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354116672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357385344))))[name = string("block_5_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_23 = constexpr_blockwise_shift_scale(data = block_5_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357541056))))[name = string("constexpr_blockwise_shift_scale_23")]; tensor block_5_ffn_g_strides_0 = const()[name = string("block_5_ffn_g_strides_0"), val = tensor([1])]; string block_5_ffn_g_pad_type_0 = const()[name = string("block_5_ffn_g_pad_type_0"), val = string("valid")]; tensor block_5_ffn_g_pad_0 = const()[name = string("block_5_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_5_ffn_g_dilations_0 = const()[name = string("block_5_ffn_g_dilations_0"), val = tensor([1])]; int32 block_5_ffn_g_groups_0 = const()[name = string("block_5_ffn_g_groups_0"), val = int32(1)]; tensor block_5_ffn_g = conv(dilations = block_5_ffn_g_dilations_0, groups = block_5_ffn_g_groups_0, pad = block_5_ffn_g_pad_0, pad_type = block_5_ffn_g_pad_type_0, strides = block_5_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_23, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_g")]; tensor block_5_ffn_g_activation = silu(x = block_5_ffn_g)[name = string("block_5_ffn_g_activation")]; tensor block_5_ffn_x_gated = mul(x = block_5_ffn_inproj, y = block_5_ffn_g_activation)[name = string("block_5_ffn_x_gated")]; tensor block_5_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357550848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360819520))))[name = string("block_5_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_24 = constexpr_blockwise_shift_scale(data = block_5_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360848256))))[name = string("constexpr_blockwise_shift_scale_24")]; tensor block_5_ffn_outproj_strides_0 = const()[name = string("block_5_ffn_outproj_strides_0"), val = tensor([1])]; string block_5_ffn_outproj_pad_type_0 = const()[name = string("block_5_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_5_ffn_outproj_pad_0 = const()[name = string("block_5_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_5_ffn_outproj_dilations_0 = const()[name = string("block_5_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_5_ffn_outproj_groups_0 = const()[name = string("block_5_ffn_outproj_groups_0"), val = int32(1)]; tensor block_5_ffn_outproj = conv(dilations = block_5_ffn_outproj_dilations_0, groups = block_5_ffn_outproj_groups_0, pad = block_5_ffn_outproj_pad_0, pad_type = block_5_ffn_outproj_pad_type_0, strides = block_5_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_24, x = block_5_ffn_x_gated)[name = string("block_5_ffn_outproj")]; tensor block_5_residual_2 = add(x = block_5_ffn_outproj, y = block_5_residual_1)[name = string("block_5_residual_2")]; tensor block_6_attention_rmsnorm_abs = abs(x = block_5_residual_2)[name = string("block_6_attention_rmsnorm_abs")]; tensor block_6_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_6_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_6_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_6_attention_rmsnorm_maxval = reduce_max(axes = block_6_attention_rmsnorm_maxval_axes_0, keep_dims = block_6_attention_rmsnorm_maxval_keep_dims_0, x = block_6_attention_rmsnorm_abs)[name = string("block_6_attention_rmsnorm_maxval")]; fp16 block_6_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_6_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_6_attention_rmsnorm_maxval_clipped = clip(alpha = block_6_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_6_attention_rmsnorm_maxval_clipped_beta_0, x = block_6_attention_rmsnorm_maxval)[name = string("block_6_attention_rmsnorm_maxval_clipped")]; tensor block_6_attention_rmsnorm_scaled = real_div(x = block_5_residual_2, y = block_6_attention_rmsnorm_maxval_clipped)[name = string("block_6_attention_rmsnorm_scaled")]; tensor block_6_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_6_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_6_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_6_attention_rmsnorm_squared_sum_keep_dims_0, x = block_6_attention_rmsnorm_scaled)[name = string("block_6_attention_rmsnorm_squared_sum")]; fp16 block_6_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_6_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_6_attention_rmsnorm_rsqrt_epsilon_0, x = block_6_attention_rmsnorm_squared_sum)[name = string("block_6_attention_rmsnorm_rsqrt")]; fp16 block_6_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_6_attention_rmsnorm_dim_scaled = mul(x = block_6_attention_rmsnorm_scaled, y = block_6_attention_rmsnorm_dim_scaled_y_0)[name = string("block_6_attention_rmsnorm_dim_scaled")]; tensor block_6_attention_rmsnorm_normalized = mul(x = block_6_attention_rmsnorm_dim_scaled, y = block_6_attention_rmsnorm_rsqrt)[name = string("block_6_attention_rmsnorm_normalized")]; tensor block_6_attention_rmsnorm_y_0 = const()[name = string("block_6_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360850112)))]; tensor block_6_attention_rmsnorm = mul(x = block_6_attention_rmsnorm_normalized, y = block_6_attention_rmsnorm_y_0)[name = string("block_6_attention_rmsnorm")]; tensor attention_6_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360851968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361626176))))[name = string("attention_6_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_25 = constexpr_blockwise_shift_scale(data = attention_6_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361663104))))[name = string("constexpr_blockwise_shift_scale_25")]; tensor attention_6_qkvproj_bias_0 = const()[name = string("attention_6_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361665472)))]; tensor attention_6_qkvproj_strides_0 = const()[name = string("attention_6_qkvproj_strides_0"), val = tensor([1])]; string attention_6_qkvproj_pad_type_0 = const()[name = string("attention_6_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_6_qkvproj_pad_0 = const()[name = string("attention_6_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_6_qkvproj_dilations_0 = const()[name = string("attention_6_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_6_qkvproj_groups_0 = const()[name = string("attention_6_qkvproj_groups_0"), val = int32(1)]; tensor attention_6_qkvproj = conv(bias = attention_6_qkvproj_bias_0, dilations = attention_6_qkvproj_dilations_0, groups = attention_6_qkvproj_groups_0, pad = attention_6_qkvproj_pad_0, pad_type = attention_6_qkvproj_pad_type_0, strides = attention_6_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_25, x = block_6_attention_rmsnorm)[name = string("attention_6_qkvproj")]; tensor attention_6_head_reshape_shape_0 = const()[name = string("attention_6_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_6_head_reshape = reshape(shape = attention_6_head_reshape_shape_0, x = attention_6_qkvproj)[name = string("attention_6_head_reshape")]; tensor attention_6_head_transpose_perm_0 = const()[name = string("attention_6_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_6_split_qkv_heads_axis_0 = const()[name = string("attention_6_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_6_split_qkv_heads_split_sizes_0 = const()[name = string("attention_6_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_6_head_transpose = transpose(perm = attention_6_head_transpose_perm_0, x = attention_6_head_reshape)[name = string("transpose_36")]; tensor attention_6_split_qkv_heads_0, tensor attention_6_split_qkv_heads_1, tensor attention_6_split_qkv_heads_2 = split(axis = attention_6_split_qkv_heads_axis_0, split_sizes = attention_6_split_qkv_heads_split_sizes_0, x = attention_6_head_transpose)[name = string("attention_6_split_qkv_heads")]; tensor attention_6_q_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_6_q_rope_lhs_mult")]; int32 attention_6_q_rotate_half_split_num_splits_0 = const()[name = string("attention_6_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_6_q_rotate_half_split_axis_0 = const()[name = string("attention_6_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_6_q_rotate_half_split_0, tensor attention_6_q_rotate_half_split_1 = split(axis = attention_6_q_rotate_half_split_axis_0, num_splits = attention_6_q_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_0)[name = string("attention_6_q_rotate_half_split")]; fp16 attention_6_q_rotate_half_neg_y_0 = const()[name = string("attention_6_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_6_q_rotate_half_neg = mul(x = attention_6_q_rotate_half_split_1, y = attention_6_q_rotate_half_neg_y_0)[name = string("attention_6_q_rotate_half_neg")]; int32 attention_6_q_rotate_half_concat_axis_0 = const()[name = string("attention_6_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_6_q_rotate_half_concat_interleave_0 = const()[name = string("attention_6_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_6_q_rotate_half_concat = concat(axis = attention_6_q_rotate_half_concat_axis_0, interleave = attention_6_q_rotate_half_concat_interleave_0, values = (attention_6_q_rotate_half_neg, attention_6_q_rotate_half_split_0))[name = string("attention_6_q_rotate_half_concat")]; tensor attention_6_q_rope_rhs_mult = mul(x = attention_6_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_q_rope_rhs_mult")]; tensor attention_6_q_rope = add(x = attention_6_q_rope_lhs_mult, y = attention_6_q_rope_rhs_mult)[name = string("attention_6_q_rope")]; tensor attention_6_k_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_6_k_rope_lhs_mult")]; int32 attention_6_k_rotate_half_split_num_splits_0 = const()[name = string("attention_6_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_6_k_rotate_half_split_axis_0 = const()[name = string("attention_6_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_6_k_rotate_half_split_0, tensor attention_6_k_rotate_half_split_1 = split(axis = attention_6_k_rotate_half_split_axis_0, num_splits = attention_6_k_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_1)[name = string("attention_6_k_rotate_half_split")]; fp16 attention_6_k_rotate_half_neg_y_0 = const()[name = string("attention_6_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_6_k_rotate_half_neg = mul(x = attention_6_k_rotate_half_split_1, y = attention_6_k_rotate_half_neg_y_0)[name = string("attention_6_k_rotate_half_neg")]; int32 attention_6_k_rotate_half_concat_axis_0 = const()[name = string("attention_6_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_6_k_rotate_half_concat_interleave_0 = const()[name = string("attention_6_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_6_k_rotate_half_concat = concat(axis = attention_6_k_rotate_half_concat_axis_0, interleave = attention_6_k_rotate_half_concat_interleave_0, values = (attention_6_k_rotate_half_neg, attention_6_k_rotate_half_split_0))[name = string("attention_6_k_rotate_half_concat")]; tensor attention_6_k_rope_rhs_mult = mul(x = attention_6_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_k_rope_rhs_mult")]; tensor attention_6_k_rope = add(x = attention_6_k_rope_lhs_mult, y = attention_6_k_rope_rhs_mult)[name = string("attention_6_k_rope")]; int32 attention_6_q_splits_axis_0 = const()[name = string("attention_6_q_splits_axis_0"), val = int32(1)]; int32 attention_6_q_splits_num_splits_0 = const()[name = string("attention_6_q_splits_num_splits_0"), val = int32(2)]; tensor attention_6_q_splits_0, tensor attention_6_q_splits_1 = split(axis = attention_6_q_splits_axis_0, num_splits = attention_6_q_splits_num_splits_0, x = attention_6_q_rope)[name = string("attention_6_q_splits")]; tensor attention_6_update_begin_0_values0_0 = const()[name = string("attention_6_update_begin_0_values0_0"), val = tensor([6])]; tensor attention_6_update_begin_0_values1_0 = const()[name = string("attention_6_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_6_update_begin_0_values3_0 = const()[name = string("attention_6_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_6_update_begin_0_axis_0 = const()[name = string("attention_6_update_begin_0_axis_0"), val = int32(0)]; bool attention_6_update_begin_0_interleave_0 = const()[name = string("attention_6_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_6_update_begin_0 = concat(axis = attention_6_update_begin_0_axis_0, interleave = attention_6_update_begin_0_interleave_0, values = (attention_6_update_begin_0_values0_0, attention_6_update_begin_0_values1_0, query_pos1, attention_6_update_begin_0_values3_0))[name = string("attention_6_update_begin_0")]; tensor attention_6_update_end_0_values0_0 = const()[name = string("attention_6_update_end_0_values0_0"), val = tensor([7])]; tensor attention_6_update_end_0_values1_0 = const()[name = string("attention_6_update_end_0_values1_0"), val = tensor([2])]; tensor attention_6_update_end_0_values3_0 = const()[name = string("attention_6_update_end_0_values3_0"), val = tensor([64])]; int32 attention_6_update_end_0_axis_0 = const()[name = string("attention_6_update_end_0_axis_0"), val = int32(0)]; bool attention_6_update_end_0_interleave_0 = const()[name = string("attention_6_update_end_0_interleave_0"), val = bool(false)]; tensor attention_6_update_end_0 = concat(axis = attention_6_update_end_0_axis_0, interleave = attention_6_update_end_0_interleave_0, values = (attention_6_update_end_0_values0_0, attention_6_update_end_0_values1_0, end_pos_0, attention_6_update_end_0_values3_0))[name = string("attention_6_update_end_0")]; tensor attention_6_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_updated_key_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_key_cache_0_squeeze_mask_0, update = attention_6_k_rope, x = coreml_update_state_10)[name = string("attention_6_updated_key_cache_0")]; write_state(data = attention_6_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_12_write_state")]; tensor coreml_update_state_12 = read_state(input = key_cache_state)[name = string("coreml_update_state_12")]; tensor attention_6_key_cache_begin_0 = const()[name = string("attention_6_key_cache_begin_0"), val = tensor([6, 0, 0, 0])]; tensor attention_6_key_cache_end_0 = const()[name = string("attention_6_key_cache_end_0"), val = tensor([7, 2, 512, 64])]; tensor attention_6_key_cache_squeeze_mask_0 = const()[name = string("attention_6_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_key_cache = slice_by_index(begin = attention_6_key_cache_begin_0, end = attention_6_key_cache_end_0, squeeze_mask = attention_6_key_cache_squeeze_mask_0, x = coreml_update_state_12)[name = string("attention_6_key_cache")]; int32 attention_6_key_cache_head_axis_0 = const()[name = string("attention_6_key_cache_head_axis_0"), val = int32(1)]; int32 attention_6_key_cache_head_num_splits_0 = const()[name = string("attention_6_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_6_key_cache_head_0, tensor attention_6_key_cache_head_1 = split(axis = attention_6_key_cache_head_axis_0, num_splits = attention_6_key_cache_head_num_splits_0, x = attention_6_key_cache)[name = string("attention_6_key_cache_head")]; tensor attention_6_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_updated_value_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_value_cache_0_squeeze_mask_0, update = attention_6_split_qkv_heads_2, x = coreml_update_state_11)[name = string("attention_6_updated_value_cache_0")]; write_state(data = attention_6_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_13_write_state")]; tensor coreml_update_state_13 = read_state(input = value_cache_state)[name = string("coreml_update_state_13")]; tensor attention_6_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_6_slice_current_layer_value_cache_begin_0"), val = tensor([6, 0, 0, 0])]; tensor attention_6_slice_current_layer_value_cache_end_0 = const()[name = string("attention_6_slice_current_layer_value_cache_end_0"), val = tensor([7, 2, 512, 64])]; tensor attention_6_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_6_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_slice_current_layer_value_cache = slice_by_index(begin = attention_6_slice_current_layer_value_cache_begin_0, end = attention_6_slice_current_layer_value_cache_end_0, squeeze_mask = attention_6_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_13)[name = string("attention_6_slice_current_layer_value_cache")]; int32 attention_6_slice_value_cache_heads_axis_0 = const()[name = string("attention_6_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_6_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_6_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_6_slice_value_cache_heads_0, tensor attention_6_slice_value_cache_heads_1 = split(axis = attention_6_slice_value_cache_heads_axis_0, num_splits = attention_6_slice_value_cache_heads_num_splits_0, x = attention_6_slice_current_layer_value_cache)[name = string("attention_6_slice_value_cache_heads")]; bool attention_6_scores_0_transpose_y_0 = const()[name = string("attention_6_scores_0_transpose_y_0"), val = bool(true)]; bool attention_6_scores_0_transpose_x_0 = const()[name = string("attention_6_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_6_scores_0 = matmul(transpose_x = attention_6_scores_0_transpose_x_0, transpose_y = attention_6_scores_0_transpose_y_0, x = attention_6_key_cache_head_0, y = attention_6_q_splits_0)[name = string("attention_6_scores_0")]; fp16 attention_6_scaled_scores_0_y_0 = const()[name = string("attention_6_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_6_scaled_scores_0 = mul(x = attention_6_scores_0, y = attention_6_scaled_scores_0_y_0)[name = string("attention_6_scaled_scores_0")]; tensor attention_6_masked_scaled_scores_0 = add(x = attention_6_scaled_scores_0, y = transpose_0)[name = string("attention_6_masked_scaled_scores_0")]; int32 softmax_12_axis_0 = const()[name = string("softmax_12_axis_0"), val = int32(-2)]; tensor softmax_12 = softmax(axis = softmax_12_axis_0, x = attention_6_masked_scaled_scores_0)[name = string("softmax_12")]; bool attention_6_attention_0_transpose_x_0 = const()[name = string("attention_6_attention_0_transpose_x_0"), val = bool(true)]; bool attention_6_attention_0_transpose_y_0 = const()[name = string("attention_6_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_6_attention_0 = matmul(transpose_x = attention_6_attention_0_transpose_x_0, transpose_y = attention_6_attention_0_transpose_y_0, x = softmax_12, y = attention_6_slice_value_cache_heads_0)[name = string("attention_6_attention_0")]; bool attention_6_scores_1_transpose_y_0 = const()[name = string("attention_6_scores_1_transpose_y_0"), val = bool(true)]; bool attention_6_scores_1_transpose_x_0 = const()[name = string("attention_6_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_6_scores_1 = matmul(transpose_x = attention_6_scores_1_transpose_x_0, transpose_y = attention_6_scores_1_transpose_y_0, x = attention_6_key_cache_head_1, y = attention_6_q_splits_1)[name = string("attention_6_scores_1")]; fp16 attention_6_scaled_scores_1_y_0 = const()[name = string("attention_6_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_6_scaled_scores_1 = mul(x = attention_6_scores_1, y = attention_6_scaled_scores_1_y_0)[name = string("attention_6_scaled_scores_1")]; tensor attention_6_masked_scaled_scores_1 = add(x = attention_6_scaled_scores_1, y = transpose_0)[name = string("attention_6_masked_scaled_scores_1")]; int32 softmax_13_axis_0 = const()[name = string("softmax_13_axis_0"), val = int32(-2)]; tensor softmax_13 = softmax(axis = softmax_13_axis_0, x = attention_6_masked_scaled_scores_1)[name = string("softmax_13")]; bool attention_6_attention_1_transpose_x_0 = const()[name = string("attention_6_attention_1_transpose_x_0"), val = bool(true)]; bool attention_6_attention_1_transpose_y_0 = const()[name = string("attention_6_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_6_attention_1 = matmul(transpose_x = attention_6_attention_1_transpose_x_0, transpose_y = attention_6_attention_1_transpose_y_0, x = softmax_13, y = attention_6_slice_value_cache_heads_1)[name = string("attention_6_attention_1")]; int32 attention_6_concat_attention_all_heads_axis_0 = const()[name = string("attention_6_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_6_concat_attention_all_heads_interleave_0 = const()[name = string("attention_6_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_6_concat_attention_all_heads = concat(axis = attention_6_concat_attention_all_heads_axis_0, interleave = attention_6_concat_attention_all_heads_interleave_0, values = (attention_6_attention_0, attention_6_attention_1))[name = string("attention_6_concat_attention_all_heads")]; tensor attention_6_channels_first_retransposed_perm_0 = const()[name = string("attention_6_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_6_reshaped_shape_0 = const()[name = string("attention_6_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_6_channels_first_retransposed = transpose(perm = attention_6_channels_first_retransposed_perm_0, x = attention_6_concat_attention_all_heads)[name = string("transpose_35")]; tensor attention_6_reshaped = reshape(shape = attention_6_reshaped_shape_0, x = attention_6_channels_first_retransposed)[name = string("attention_6_reshaped")]; tensor attention_6_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361667840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362270016))))[name = string("attention_6_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_26 = constexpr_blockwise_shift_scale(data = attention_6_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362298752))))[name = string("constexpr_blockwise_shift_scale_26")]; tensor attention_6_outproj_strides_0 = const()[name = string("attention_6_outproj_strides_0"), val = tensor([1])]; string attention_6_outproj_pad_type_0 = const()[name = string("attention_6_outproj_pad_type_0"), val = string("valid")]; tensor attention_6_outproj_pad_0 = const()[name = string("attention_6_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_6_outproj_dilations_0 = const()[name = string("attention_6_outproj_dilations_0"), val = tensor([1])]; int32 attention_6_outproj_groups_0 = const()[name = string("attention_6_outproj_groups_0"), val = int32(1)]; tensor attention_6_outproj = conv(dilations = attention_6_outproj_dilations_0, groups = attention_6_outproj_groups_0, pad = attention_6_outproj_pad_0, pad_type = attention_6_outproj_pad_type_0, strides = attention_6_outproj_strides_0, weight = constexpr_blockwise_shift_scale_26, x = attention_6_reshaped)[name = string("attention_6_outproj")]; tensor block_6_residual_1 = add(x = block_5_residual_2, y = attention_6_outproj)[name = string("block_6_residual_1")]; tensor block_6_ffn_rmsnorm_abs = abs(x = block_6_residual_1)[name = string("block_6_ffn_rmsnorm_abs")]; tensor block_6_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_6_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_6_ffn_rmsnorm_maxval = reduce_max(axes = block_6_ffn_rmsnorm_maxval_axes_0, keep_dims = block_6_ffn_rmsnorm_maxval_keep_dims_0, x = block_6_ffn_rmsnorm_abs)[name = string("block_6_ffn_rmsnorm_maxval")]; fp16 block_6_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_6_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_6_ffn_rmsnorm_maxval_clipped = clip(alpha = block_6_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_6_ffn_rmsnorm_maxval_clipped_beta_0, x = block_6_ffn_rmsnorm_maxval)[name = string("block_6_ffn_rmsnorm_maxval_clipped")]; tensor block_6_ffn_rmsnorm_scaled = real_div(x = block_6_residual_1, y = block_6_ffn_rmsnorm_maxval_clipped)[name = string("block_6_ffn_rmsnorm_scaled")]; tensor block_6_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_6_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_6_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_6_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_6_ffn_rmsnorm_scaled)[name = string("block_6_ffn_rmsnorm_squared_sum")]; fp16 block_6_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_6_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_6_ffn_rmsnorm_rsqrt_epsilon_0, x = block_6_ffn_rmsnorm_squared_sum)[name = string("block_6_ffn_rmsnorm_rsqrt")]; fp16 block_6_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_6_ffn_rmsnorm_dim_scaled = mul(x = block_6_ffn_rmsnorm_scaled, y = block_6_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_6_ffn_rmsnorm_dim_scaled")]; tensor block_6_ffn_rmsnorm_normalized = mul(x = block_6_ffn_rmsnorm_dim_scaled, y = block_6_ffn_rmsnorm_rsqrt)[name = string("block_6_ffn_rmsnorm_normalized")]; tensor block_6_ffn_rmsnorm_y_0 = const()[name = string("block_6_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362300608)))]; tensor block_6_ffn_rmsnorm = mul(x = block_6_ffn_rmsnorm_normalized, y = block_6_ffn_rmsnorm_y_0)[name = string("block_6_ffn_rmsnorm")]; tensor block_6_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362302464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365571136))))[name = string("block_6_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_27 = constexpr_blockwise_shift_scale(data = block_6_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365726848))))[name = string("constexpr_blockwise_shift_scale_27")]; tensor block_6_ffn_inproj_strides_0 = const()[name = string("block_6_ffn_inproj_strides_0"), val = tensor([1])]; string block_6_ffn_inproj_pad_type_0 = const()[name = string("block_6_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_6_ffn_inproj_pad_0 = const()[name = string("block_6_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_6_ffn_inproj_dilations_0 = const()[name = string("block_6_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_6_ffn_inproj_groups_0 = const()[name = string("block_6_ffn_inproj_groups_0"), val = int32(1)]; tensor block_6_ffn_inproj = conv(dilations = block_6_ffn_inproj_dilations_0, groups = block_6_ffn_inproj_groups_0, pad = block_6_ffn_inproj_pad_0, pad_type = block_6_ffn_inproj_pad_type_0, strides = block_6_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_27, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_inproj")]; tensor block_6_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365736640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369005312))))[name = string("block_6_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_28 = constexpr_blockwise_shift_scale(data = block_6_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369161024))))[name = string("constexpr_blockwise_shift_scale_28")]; tensor block_6_ffn_g_strides_0 = const()[name = string("block_6_ffn_g_strides_0"), val = tensor([1])]; string block_6_ffn_g_pad_type_0 = const()[name = string("block_6_ffn_g_pad_type_0"), val = string("valid")]; tensor block_6_ffn_g_pad_0 = const()[name = string("block_6_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_6_ffn_g_dilations_0 = const()[name = string("block_6_ffn_g_dilations_0"), val = tensor([1])]; int32 block_6_ffn_g_groups_0 = const()[name = string("block_6_ffn_g_groups_0"), val = int32(1)]; tensor block_6_ffn_g = conv(dilations = block_6_ffn_g_dilations_0, groups = block_6_ffn_g_groups_0, pad = block_6_ffn_g_pad_0, pad_type = block_6_ffn_g_pad_type_0, strides = block_6_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_28, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_g")]; tensor block_6_ffn_g_activation = silu(x = block_6_ffn_g)[name = string("block_6_ffn_g_activation")]; tensor block_6_ffn_x_gated = mul(x = block_6_ffn_inproj, y = block_6_ffn_g_activation)[name = string("block_6_ffn_x_gated")]; tensor block_6_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369170816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372439488))))[name = string("block_6_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_29 = constexpr_blockwise_shift_scale(data = block_6_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372468224))))[name = string("constexpr_blockwise_shift_scale_29")]; tensor block_6_ffn_outproj_strides_0 = const()[name = string("block_6_ffn_outproj_strides_0"), val = tensor([1])]; string block_6_ffn_outproj_pad_type_0 = const()[name = string("block_6_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_6_ffn_outproj_pad_0 = const()[name = string("block_6_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_6_ffn_outproj_dilations_0 = const()[name = string("block_6_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_6_ffn_outproj_groups_0 = const()[name = string("block_6_ffn_outproj_groups_0"), val = int32(1)]; tensor block_6_ffn_outproj = conv(dilations = block_6_ffn_outproj_dilations_0, groups = block_6_ffn_outproj_groups_0, pad = block_6_ffn_outproj_pad_0, pad_type = block_6_ffn_outproj_pad_type_0, strides = block_6_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_29, x = block_6_ffn_x_gated)[name = string("block_6_ffn_outproj")]; tensor block_6_residual_2 = add(x = block_6_ffn_outproj, y = block_6_residual_1)[name = string("block_6_residual_2")]; tensor block_7_attention_rmsnorm_abs = abs(x = block_6_residual_2)[name = string("block_7_attention_rmsnorm_abs")]; tensor block_7_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_7_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_7_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_7_attention_rmsnorm_maxval = reduce_max(axes = block_7_attention_rmsnorm_maxval_axes_0, keep_dims = block_7_attention_rmsnorm_maxval_keep_dims_0, x = block_7_attention_rmsnorm_abs)[name = string("block_7_attention_rmsnorm_maxval")]; fp16 block_7_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_7_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_7_attention_rmsnorm_maxval_clipped = clip(alpha = block_7_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_7_attention_rmsnorm_maxval_clipped_beta_0, x = block_7_attention_rmsnorm_maxval)[name = string("block_7_attention_rmsnorm_maxval_clipped")]; tensor block_7_attention_rmsnorm_scaled = real_div(x = block_6_residual_2, y = block_7_attention_rmsnorm_maxval_clipped)[name = string("block_7_attention_rmsnorm_scaled")]; tensor block_7_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_7_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_7_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_7_attention_rmsnorm_squared_sum_keep_dims_0, x = block_7_attention_rmsnorm_scaled)[name = string("block_7_attention_rmsnorm_squared_sum")]; fp16 block_7_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_7_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_7_attention_rmsnorm_rsqrt_epsilon_0, x = block_7_attention_rmsnorm_squared_sum)[name = string("block_7_attention_rmsnorm_rsqrt")]; fp16 block_7_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_7_attention_rmsnorm_dim_scaled = mul(x = block_7_attention_rmsnorm_scaled, y = block_7_attention_rmsnorm_dim_scaled_y_0)[name = string("block_7_attention_rmsnorm_dim_scaled")]; tensor block_7_attention_rmsnorm_normalized = mul(x = block_7_attention_rmsnorm_dim_scaled, y = block_7_attention_rmsnorm_rsqrt)[name = string("block_7_attention_rmsnorm_normalized")]; tensor block_7_attention_rmsnorm_y_0 = const()[name = string("block_7_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372470080)))]; tensor block_7_attention_rmsnorm = mul(x = block_7_attention_rmsnorm_normalized, y = block_7_attention_rmsnorm_y_0)[name = string("block_7_attention_rmsnorm")]; tensor attention_7_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372471936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373246144))))[name = string("attention_7_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_30 = constexpr_blockwise_shift_scale(data = attention_7_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373283072))))[name = string("constexpr_blockwise_shift_scale_30")]; tensor attention_7_qkvproj_bias_0 = const()[name = string("attention_7_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373285440)))]; tensor attention_7_qkvproj_strides_0 = const()[name = string("attention_7_qkvproj_strides_0"), val = tensor([1])]; string attention_7_qkvproj_pad_type_0 = const()[name = string("attention_7_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_7_qkvproj_pad_0 = const()[name = string("attention_7_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_7_qkvproj_dilations_0 = const()[name = string("attention_7_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_7_qkvproj_groups_0 = const()[name = string("attention_7_qkvproj_groups_0"), val = int32(1)]; tensor attention_7_qkvproj = conv(bias = attention_7_qkvproj_bias_0, dilations = attention_7_qkvproj_dilations_0, groups = attention_7_qkvproj_groups_0, pad = attention_7_qkvproj_pad_0, pad_type = attention_7_qkvproj_pad_type_0, strides = attention_7_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_30, x = block_7_attention_rmsnorm)[name = string("attention_7_qkvproj")]; tensor attention_7_head_reshape_shape_0 = const()[name = string("attention_7_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_7_head_reshape = reshape(shape = attention_7_head_reshape_shape_0, x = attention_7_qkvproj)[name = string("attention_7_head_reshape")]; tensor attention_7_head_transpose_perm_0 = const()[name = string("attention_7_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_7_split_qkv_heads_axis_0 = const()[name = string("attention_7_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_7_split_qkv_heads_split_sizes_0 = const()[name = string("attention_7_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_7_head_transpose = transpose(perm = attention_7_head_transpose_perm_0, x = attention_7_head_reshape)[name = string("transpose_34")]; tensor attention_7_split_qkv_heads_0, tensor attention_7_split_qkv_heads_1, tensor attention_7_split_qkv_heads_2 = split(axis = attention_7_split_qkv_heads_axis_0, split_sizes = attention_7_split_qkv_heads_split_sizes_0, x = attention_7_head_transpose)[name = string("attention_7_split_qkv_heads")]; tensor attention_7_q_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_7_q_rope_lhs_mult")]; int32 attention_7_q_rotate_half_split_num_splits_0 = const()[name = string("attention_7_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_7_q_rotate_half_split_axis_0 = const()[name = string("attention_7_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_7_q_rotate_half_split_0, tensor attention_7_q_rotate_half_split_1 = split(axis = attention_7_q_rotate_half_split_axis_0, num_splits = attention_7_q_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_0)[name = string("attention_7_q_rotate_half_split")]; fp16 attention_7_q_rotate_half_neg_y_0 = const()[name = string("attention_7_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_7_q_rotate_half_neg = mul(x = attention_7_q_rotate_half_split_1, y = attention_7_q_rotate_half_neg_y_0)[name = string("attention_7_q_rotate_half_neg")]; int32 attention_7_q_rotate_half_concat_axis_0 = const()[name = string("attention_7_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_7_q_rotate_half_concat_interleave_0 = const()[name = string("attention_7_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_7_q_rotate_half_concat = concat(axis = attention_7_q_rotate_half_concat_axis_0, interleave = attention_7_q_rotate_half_concat_interleave_0, values = (attention_7_q_rotate_half_neg, attention_7_q_rotate_half_split_0))[name = string("attention_7_q_rotate_half_concat")]; tensor attention_7_q_rope_rhs_mult = mul(x = attention_7_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_q_rope_rhs_mult")]; tensor attention_7_q_rope = add(x = attention_7_q_rope_lhs_mult, y = attention_7_q_rope_rhs_mult)[name = string("attention_7_q_rope")]; tensor attention_7_k_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_7_k_rope_lhs_mult")]; int32 attention_7_k_rotate_half_split_num_splits_0 = const()[name = string("attention_7_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_7_k_rotate_half_split_axis_0 = const()[name = string("attention_7_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_7_k_rotate_half_split_0, tensor attention_7_k_rotate_half_split_1 = split(axis = attention_7_k_rotate_half_split_axis_0, num_splits = attention_7_k_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_1)[name = string("attention_7_k_rotate_half_split")]; fp16 attention_7_k_rotate_half_neg_y_0 = const()[name = string("attention_7_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_7_k_rotate_half_neg = mul(x = attention_7_k_rotate_half_split_1, y = attention_7_k_rotate_half_neg_y_0)[name = string("attention_7_k_rotate_half_neg")]; int32 attention_7_k_rotate_half_concat_axis_0 = const()[name = string("attention_7_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_7_k_rotate_half_concat_interleave_0 = const()[name = string("attention_7_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_7_k_rotate_half_concat = concat(axis = attention_7_k_rotate_half_concat_axis_0, interleave = attention_7_k_rotate_half_concat_interleave_0, values = (attention_7_k_rotate_half_neg, attention_7_k_rotate_half_split_0))[name = string("attention_7_k_rotate_half_concat")]; tensor attention_7_k_rope_rhs_mult = mul(x = attention_7_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_k_rope_rhs_mult")]; tensor attention_7_k_rope = add(x = attention_7_k_rope_lhs_mult, y = attention_7_k_rope_rhs_mult)[name = string("attention_7_k_rope")]; int32 attention_7_q_splits_axis_0 = const()[name = string("attention_7_q_splits_axis_0"), val = int32(1)]; int32 attention_7_q_splits_num_splits_0 = const()[name = string("attention_7_q_splits_num_splits_0"), val = int32(2)]; tensor attention_7_q_splits_0, tensor attention_7_q_splits_1 = split(axis = attention_7_q_splits_axis_0, num_splits = attention_7_q_splits_num_splits_0, x = attention_7_q_rope)[name = string("attention_7_q_splits")]; tensor attention_7_update_begin_0_values0_0 = const()[name = string("attention_7_update_begin_0_values0_0"), val = tensor([7])]; tensor attention_7_update_begin_0_values1_0 = const()[name = string("attention_7_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_7_update_begin_0_values3_0 = const()[name = string("attention_7_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_7_update_begin_0_axis_0 = const()[name = string("attention_7_update_begin_0_axis_0"), val = int32(0)]; bool attention_7_update_begin_0_interleave_0 = const()[name = string("attention_7_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_7_update_begin_0 = concat(axis = attention_7_update_begin_0_axis_0, interleave = attention_7_update_begin_0_interleave_0, values = (attention_7_update_begin_0_values0_0, attention_7_update_begin_0_values1_0, query_pos1, attention_7_update_begin_0_values3_0))[name = string("attention_7_update_begin_0")]; tensor attention_7_update_end_0_values0_0 = const()[name = string("attention_7_update_end_0_values0_0"), val = tensor([8])]; tensor attention_7_update_end_0_values1_0 = const()[name = string("attention_7_update_end_0_values1_0"), val = tensor([2])]; tensor attention_7_update_end_0_values3_0 = const()[name = string("attention_7_update_end_0_values3_0"), val = tensor([64])]; int32 attention_7_update_end_0_axis_0 = const()[name = string("attention_7_update_end_0_axis_0"), val = int32(0)]; bool attention_7_update_end_0_interleave_0 = const()[name = string("attention_7_update_end_0_interleave_0"), val = bool(false)]; tensor attention_7_update_end_0 = concat(axis = attention_7_update_end_0_axis_0, interleave = attention_7_update_end_0_interleave_0, values = (attention_7_update_end_0_values0_0, attention_7_update_end_0_values1_0, end_pos_0, attention_7_update_end_0_values3_0))[name = string("attention_7_update_end_0")]; tensor attention_7_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_updated_key_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_key_cache_0_squeeze_mask_0, update = attention_7_k_rope, x = coreml_update_state_12)[name = string("attention_7_updated_key_cache_0")]; write_state(data = attention_7_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_14_write_state")]; tensor coreml_update_state_14 = read_state(input = key_cache_state)[name = string("coreml_update_state_14")]; tensor attention_7_key_cache_begin_0 = const()[name = string("attention_7_key_cache_begin_0"), val = tensor([7, 0, 0, 0])]; tensor attention_7_key_cache_end_0 = const()[name = string("attention_7_key_cache_end_0"), val = tensor([8, 2, 512, 64])]; tensor attention_7_key_cache_squeeze_mask_0 = const()[name = string("attention_7_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_key_cache = slice_by_index(begin = attention_7_key_cache_begin_0, end = attention_7_key_cache_end_0, squeeze_mask = attention_7_key_cache_squeeze_mask_0, x = coreml_update_state_14)[name = string("attention_7_key_cache")]; int32 attention_7_key_cache_head_axis_0 = const()[name = string("attention_7_key_cache_head_axis_0"), val = int32(1)]; int32 attention_7_key_cache_head_num_splits_0 = const()[name = string("attention_7_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_7_key_cache_head_0, tensor attention_7_key_cache_head_1 = split(axis = attention_7_key_cache_head_axis_0, num_splits = attention_7_key_cache_head_num_splits_0, x = attention_7_key_cache)[name = string("attention_7_key_cache_head")]; tensor attention_7_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_updated_value_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_value_cache_0_squeeze_mask_0, update = attention_7_split_qkv_heads_2, x = coreml_update_state_13)[name = string("attention_7_updated_value_cache_0")]; write_state(data = attention_7_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_15_write_state")]; tensor coreml_update_state_15 = read_state(input = value_cache_state)[name = string("coreml_update_state_15")]; tensor attention_7_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_7_slice_current_layer_value_cache_begin_0"), val = tensor([7, 0, 0, 0])]; tensor attention_7_slice_current_layer_value_cache_end_0 = const()[name = string("attention_7_slice_current_layer_value_cache_end_0"), val = tensor([8, 2, 512, 64])]; tensor attention_7_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_7_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_slice_current_layer_value_cache = slice_by_index(begin = attention_7_slice_current_layer_value_cache_begin_0, end = attention_7_slice_current_layer_value_cache_end_0, squeeze_mask = attention_7_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_15)[name = string("attention_7_slice_current_layer_value_cache")]; int32 attention_7_slice_value_cache_heads_axis_0 = const()[name = string("attention_7_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_7_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_7_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_7_slice_value_cache_heads_0, tensor attention_7_slice_value_cache_heads_1 = split(axis = attention_7_slice_value_cache_heads_axis_0, num_splits = attention_7_slice_value_cache_heads_num_splits_0, x = attention_7_slice_current_layer_value_cache)[name = string("attention_7_slice_value_cache_heads")]; bool attention_7_scores_0_transpose_y_0 = const()[name = string("attention_7_scores_0_transpose_y_0"), val = bool(true)]; bool attention_7_scores_0_transpose_x_0 = const()[name = string("attention_7_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_7_scores_0 = matmul(transpose_x = attention_7_scores_0_transpose_x_0, transpose_y = attention_7_scores_0_transpose_y_0, x = attention_7_key_cache_head_0, y = attention_7_q_splits_0)[name = string("attention_7_scores_0")]; fp16 attention_7_scaled_scores_0_y_0 = const()[name = string("attention_7_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_7_scaled_scores_0 = mul(x = attention_7_scores_0, y = attention_7_scaled_scores_0_y_0)[name = string("attention_7_scaled_scores_0")]; tensor attention_7_masked_scaled_scores_0 = add(x = attention_7_scaled_scores_0, y = transpose_0)[name = string("attention_7_masked_scaled_scores_0")]; int32 softmax_14_axis_0 = const()[name = string("softmax_14_axis_0"), val = int32(-2)]; tensor softmax_14 = softmax(axis = softmax_14_axis_0, x = attention_7_masked_scaled_scores_0)[name = string("softmax_14")]; bool attention_7_attention_0_transpose_x_0 = const()[name = string("attention_7_attention_0_transpose_x_0"), val = bool(true)]; bool attention_7_attention_0_transpose_y_0 = const()[name = string("attention_7_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_7_attention_0 = matmul(transpose_x = attention_7_attention_0_transpose_x_0, transpose_y = attention_7_attention_0_transpose_y_0, x = softmax_14, y = attention_7_slice_value_cache_heads_0)[name = string("attention_7_attention_0")]; bool attention_7_scores_1_transpose_y_0 = const()[name = string("attention_7_scores_1_transpose_y_0"), val = bool(true)]; bool attention_7_scores_1_transpose_x_0 = const()[name = string("attention_7_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_7_scores_1 = matmul(transpose_x = attention_7_scores_1_transpose_x_0, transpose_y = attention_7_scores_1_transpose_y_0, x = attention_7_key_cache_head_1, y = attention_7_q_splits_1)[name = string("attention_7_scores_1")]; fp16 attention_7_scaled_scores_1_y_0 = const()[name = string("attention_7_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_7_scaled_scores_1 = mul(x = attention_7_scores_1, y = attention_7_scaled_scores_1_y_0)[name = string("attention_7_scaled_scores_1")]; tensor attention_7_masked_scaled_scores_1 = add(x = attention_7_scaled_scores_1, y = transpose_0)[name = string("attention_7_masked_scaled_scores_1")]; int32 softmax_15_axis_0 = const()[name = string("softmax_15_axis_0"), val = int32(-2)]; tensor softmax_15 = softmax(axis = softmax_15_axis_0, x = attention_7_masked_scaled_scores_1)[name = string("softmax_15")]; bool attention_7_attention_1_transpose_x_0 = const()[name = string("attention_7_attention_1_transpose_x_0"), val = bool(true)]; bool attention_7_attention_1_transpose_y_0 = const()[name = string("attention_7_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_7_attention_1 = matmul(transpose_x = attention_7_attention_1_transpose_x_0, transpose_y = attention_7_attention_1_transpose_y_0, x = softmax_15, y = attention_7_slice_value_cache_heads_1)[name = string("attention_7_attention_1")]; int32 attention_7_concat_attention_all_heads_axis_0 = const()[name = string("attention_7_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_7_concat_attention_all_heads_interleave_0 = const()[name = string("attention_7_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_7_concat_attention_all_heads = concat(axis = attention_7_concat_attention_all_heads_axis_0, interleave = attention_7_concat_attention_all_heads_interleave_0, values = (attention_7_attention_0, attention_7_attention_1))[name = string("attention_7_concat_attention_all_heads")]; tensor attention_7_channels_first_retransposed_perm_0 = const()[name = string("attention_7_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_7_reshaped_shape_0 = const()[name = string("attention_7_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_7_channels_first_retransposed = transpose(perm = attention_7_channels_first_retransposed_perm_0, x = attention_7_concat_attention_all_heads)[name = string("transpose_33")]; tensor attention_7_reshaped = reshape(shape = attention_7_reshaped_shape_0, x = attention_7_channels_first_retransposed)[name = string("attention_7_reshaped")]; tensor attention_7_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373287808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373889984))))[name = string("attention_7_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_31 = constexpr_blockwise_shift_scale(data = attention_7_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373918720))))[name = string("constexpr_blockwise_shift_scale_31")]; tensor attention_7_outproj_strides_0 = const()[name = string("attention_7_outproj_strides_0"), val = tensor([1])]; string attention_7_outproj_pad_type_0 = const()[name = string("attention_7_outproj_pad_type_0"), val = string("valid")]; tensor attention_7_outproj_pad_0 = const()[name = string("attention_7_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_7_outproj_dilations_0 = const()[name = string("attention_7_outproj_dilations_0"), val = tensor([1])]; int32 attention_7_outproj_groups_0 = const()[name = string("attention_7_outproj_groups_0"), val = int32(1)]; tensor attention_7_outproj = conv(dilations = attention_7_outproj_dilations_0, groups = attention_7_outproj_groups_0, pad = attention_7_outproj_pad_0, pad_type = attention_7_outproj_pad_type_0, strides = attention_7_outproj_strides_0, weight = constexpr_blockwise_shift_scale_31, x = attention_7_reshaped)[name = string("attention_7_outproj")]; tensor block_7_residual_1 = add(x = block_6_residual_2, y = attention_7_outproj)[name = string("block_7_residual_1")]; tensor block_7_ffn_rmsnorm_abs = abs(x = block_7_residual_1)[name = string("block_7_ffn_rmsnorm_abs")]; tensor block_7_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_7_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_7_ffn_rmsnorm_maxval = reduce_max(axes = block_7_ffn_rmsnorm_maxval_axes_0, keep_dims = block_7_ffn_rmsnorm_maxval_keep_dims_0, x = block_7_ffn_rmsnorm_abs)[name = string("block_7_ffn_rmsnorm_maxval")]; fp16 block_7_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_7_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_7_ffn_rmsnorm_maxval_clipped = clip(alpha = block_7_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_7_ffn_rmsnorm_maxval_clipped_beta_0, x = block_7_ffn_rmsnorm_maxval)[name = string("block_7_ffn_rmsnorm_maxval_clipped")]; tensor block_7_ffn_rmsnorm_scaled = real_div(x = block_7_residual_1, y = block_7_ffn_rmsnorm_maxval_clipped)[name = string("block_7_ffn_rmsnorm_scaled")]; tensor block_7_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_7_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_7_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_7_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_7_ffn_rmsnorm_scaled)[name = string("block_7_ffn_rmsnorm_squared_sum")]; fp16 block_7_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_7_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_7_ffn_rmsnorm_rsqrt_epsilon_0, x = block_7_ffn_rmsnorm_squared_sum)[name = string("block_7_ffn_rmsnorm_rsqrt")]; fp16 block_7_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_7_ffn_rmsnorm_dim_scaled = mul(x = block_7_ffn_rmsnorm_scaled, y = block_7_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_7_ffn_rmsnorm_dim_scaled")]; tensor block_7_ffn_rmsnorm_normalized = mul(x = block_7_ffn_rmsnorm_dim_scaled, y = block_7_ffn_rmsnorm_rsqrt)[name = string("block_7_ffn_rmsnorm_normalized")]; tensor block_7_ffn_rmsnorm_y_0 = const()[name = string("block_7_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373920576)))]; tensor block_7_ffn_rmsnorm = mul(x = block_7_ffn_rmsnorm_normalized, y = block_7_ffn_rmsnorm_y_0)[name = string("block_7_ffn_rmsnorm")]; tensor block_7_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373922432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377191104))))[name = string("block_7_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_32 = constexpr_blockwise_shift_scale(data = block_7_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377346816))))[name = string("constexpr_blockwise_shift_scale_32")]; tensor block_7_ffn_inproj_strides_0 = const()[name = string("block_7_ffn_inproj_strides_0"), val = tensor([1])]; string block_7_ffn_inproj_pad_type_0 = const()[name = string("block_7_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_7_ffn_inproj_pad_0 = const()[name = string("block_7_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_7_ffn_inproj_dilations_0 = const()[name = string("block_7_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_7_ffn_inproj_groups_0 = const()[name = string("block_7_ffn_inproj_groups_0"), val = int32(1)]; tensor block_7_ffn_inproj = conv(dilations = block_7_ffn_inproj_dilations_0, groups = block_7_ffn_inproj_groups_0, pad = block_7_ffn_inproj_pad_0, pad_type = block_7_ffn_inproj_pad_type_0, strides = block_7_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_32, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_inproj")]; tensor block_7_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377356608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380625280))))[name = string("block_7_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_33 = constexpr_blockwise_shift_scale(data = block_7_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380780992))))[name = string("constexpr_blockwise_shift_scale_33")]; tensor block_7_ffn_g_strides_0 = const()[name = string("block_7_ffn_g_strides_0"), val = tensor([1])]; string block_7_ffn_g_pad_type_0 = const()[name = string("block_7_ffn_g_pad_type_0"), val = string("valid")]; tensor block_7_ffn_g_pad_0 = const()[name = string("block_7_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_7_ffn_g_dilations_0 = const()[name = string("block_7_ffn_g_dilations_0"), val = tensor([1])]; int32 block_7_ffn_g_groups_0 = const()[name = string("block_7_ffn_g_groups_0"), val = int32(1)]; tensor block_7_ffn_g = conv(dilations = block_7_ffn_g_dilations_0, groups = block_7_ffn_g_groups_0, pad = block_7_ffn_g_pad_0, pad_type = block_7_ffn_g_pad_type_0, strides = block_7_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_33, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_g")]; tensor block_7_ffn_g_activation = silu(x = block_7_ffn_g)[name = string("block_7_ffn_g_activation")]; tensor block_7_ffn_x_gated = mul(x = block_7_ffn_inproj, y = block_7_ffn_g_activation)[name = string("block_7_ffn_x_gated")]; tensor block_7_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380790784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384059456))))[name = string("block_7_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_34 = constexpr_blockwise_shift_scale(data = block_7_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384088192))))[name = string("constexpr_blockwise_shift_scale_34")]; tensor block_7_ffn_outproj_strides_0 = const()[name = string("block_7_ffn_outproj_strides_0"), val = tensor([1])]; string block_7_ffn_outproj_pad_type_0 = const()[name = string("block_7_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_7_ffn_outproj_pad_0 = const()[name = string("block_7_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_7_ffn_outproj_dilations_0 = const()[name = string("block_7_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_7_ffn_outproj_groups_0 = const()[name = string("block_7_ffn_outproj_groups_0"), val = int32(1)]; tensor block_7_ffn_outproj = conv(dilations = block_7_ffn_outproj_dilations_0, groups = block_7_ffn_outproj_groups_0, pad = block_7_ffn_outproj_pad_0, pad_type = block_7_ffn_outproj_pad_type_0, strides = block_7_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_34, x = block_7_ffn_x_gated)[name = string("block_7_ffn_outproj")]; tensor block_7_residual_2 = add(x = block_7_ffn_outproj, y = block_7_residual_1)[name = string("block_7_residual_2")]; tensor block_8_attention_rmsnorm_abs = abs(x = block_7_residual_2)[name = string("block_8_attention_rmsnorm_abs")]; tensor block_8_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_8_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_8_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_8_attention_rmsnorm_maxval = reduce_max(axes = block_8_attention_rmsnorm_maxval_axes_0, keep_dims = block_8_attention_rmsnorm_maxval_keep_dims_0, x = block_8_attention_rmsnorm_abs)[name = string("block_8_attention_rmsnorm_maxval")]; fp16 block_8_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_8_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_8_attention_rmsnorm_maxval_clipped = clip(alpha = block_8_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_8_attention_rmsnorm_maxval_clipped_beta_0, x = block_8_attention_rmsnorm_maxval)[name = string("block_8_attention_rmsnorm_maxval_clipped")]; tensor block_8_attention_rmsnorm_scaled = real_div(x = block_7_residual_2, y = block_8_attention_rmsnorm_maxval_clipped)[name = string("block_8_attention_rmsnorm_scaled")]; tensor block_8_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_8_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_8_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_8_attention_rmsnorm_squared_sum_keep_dims_0, x = block_8_attention_rmsnorm_scaled)[name = string("block_8_attention_rmsnorm_squared_sum")]; fp16 block_8_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_8_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_8_attention_rmsnorm_rsqrt_epsilon_0, x = block_8_attention_rmsnorm_squared_sum)[name = string("block_8_attention_rmsnorm_rsqrt")]; fp16 block_8_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_8_attention_rmsnorm_dim_scaled = mul(x = block_8_attention_rmsnorm_scaled, y = block_8_attention_rmsnorm_dim_scaled_y_0)[name = string("block_8_attention_rmsnorm_dim_scaled")]; tensor block_8_attention_rmsnorm_normalized = mul(x = block_8_attention_rmsnorm_dim_scaled, y = block_8_attention_rmsnorm_rsqrt)[name = string("block_8_attention_rmsnorm_normalized")]; tensor block_8_attention_rmsnorm_y_0 = const()[name = string("block_8_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384090048)))]; tensor block_8_attention_rmsnorm = mul(x = block_8_attention_rmsnorm_normalized, y = block_8_attention_rmsnorm_y_0)[name = string("block_8_attention_rmsnorm")]; tensor attention_8_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384091904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384866112))))[name = string("attention_8_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_35 = constexpr_blockwise_shift_scale(data = attention_8_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384903040))))[name = string("constexpr_blockwise_shift_scale_35")]; tensor attention_8_qkvproj_bias_0 = const()[name = string("attention_8_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384905408)))]; tensor attention_8_qkvproj_strides_0 = const()[name = string("attention_8_qkvproj_strides_0"), val = tensor([1])]; string attention_8_qkvproj_pad_type_0 = const()[name = string("attention_8_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_8_qkvproj_pad_0 = const()[name = string("attention_8_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_8_qkvproj_dilations_0 = const()[name = string("attention_8_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_8_qkvproj_groups_0 = const()[name = string("attention_8_qkvproj_groups_0"), val = int32(1)]; tensor attention_8_qkvproj = conv(bias = attention_8_qkvproj_bias_0, dilations = attention_8_qkvproj_dilations_0, groups = attention_8_qkvproj_groups_0, pad = attention_8_qkvproj_pad_0, pad_type = attention_8_qkvproj_pad_type_0, strides = attention_8_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_35, x = block_8_attention_rmsnorm)[name = string("attention_8_qkvproj")]; tensor attention_8_head_reshape_shape_0 = const()[name = string("attention_8_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_8_head_reshape = reshape(shape = attention_8_head_reshape_shape_0, x = attention_8_qkvproj)[name = string("attention_8_head_reshape")]; tensor attention_8_head_transpose_perm_0 = const()[name = string("attention_8_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_8_split_qkv_heads_axis_0 = const()[name = string("attention_8_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_8_split_qkv_heads_split_sizes_0 = const()[name = string("attention_8_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_8_head_transpose = transpose(perm = attention_8_head_transpose_perm_0, x = attention_8_head_reshape)[name = string("transpose_32")]; tensor attention_8_split_qkv_heads_0, tensor attention_8_split_qkv_heads_1, tensor attention_8_split_qkv_heads_2 = split(axis = attention_8_split_qkv_heads_axis_0, split_sizes = attention_8_split_qkv_heads_split_sizes_0, x = attention_8_head_transpose)[name = string("attention_8_split_qkv_heads")]; tensor attention_8_q_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_8_q_rope_lhs_mult")]; int32 attention_8_q_rotate_half_split_num_splits_0 = const()[name = string("attention_8_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_8_q_rotate_half_split_axis_0 = const()[name = string("attention_8_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_8_q_rotate_half_split_0, tensor attention_8_q_rotate_half_split_1 = split(axis = attention_8_q_rotate_half_split_axis_0, num_splits = attention_8_q_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_0)[name = string("attention_8_q_rotate_half_split")]; fp16 attention_8_q_rotate_half_neg_y_0 = const()[name = string("attention_8_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_8_q_rotate_half_neg = mul(x = attention_8_q_rotate_half_split_1, y = attention_8_q_rotate_half_neg_y_0)[name = string("attention_8_q_rotate_half_neg")]; int32 attention_8_q_rotate_half_concat_axis_0 = const()[name = string("attention_8_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_8_q_rotate_half_concat_interleave_0 = const()[name = string("attention_8_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_8_q_rotate_half_concat = concat(axis = attention_8_q_rotate_half_concat_axis_0, interleave = attention_8_q_rotate_half_concat_interleave_0, values = (attention_8_q_rotate_half_neg, attention_8_q_rotate_half_split_0))[name = string("attention_8_q_rotate_half_concat")]; tensor attention_8_q_rope_rhs_mult = mul(x = attention_8_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_q_rope_rhs_mult")]; tensor attention_8_q_rope = add(x = attention_8_q_rope_lhs_mult, y = attention_8_q_rope_rhs_mult)[name = string("attention_8_q_rope")]; tensor attention_8_k_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_8_k_rope_lhs_mult")]; int32 attention_8_k_rotate_half_split_num_splits_0 = const()[name = string("attention_8_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_8_k_rotate_half_split_axis_0 = const()[name = string("attention_8_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_8_k_rotate_half_split_0, tensor attention_8_k_rotate_half_split_1 = split(axis = attention_8_k_rotate_half_split_axis_0, num_splits = attention_8_k_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_1)[name = string("attention_8_k_rotate_half_split")]; fp16 attention_8_k_rotate_half_neg_y_0 = const()[name = string("attention_8_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_8_k_rotate_half_neg = mul(x = attention_8_k_rotate_half_split_1, y = attention_8_k_rotate_half_neg_y_0)[name = string("attention_8_k_rotate_half_neg")]; int32 attention_8_k_rotate_half_concat_axis_0 = const()[name = string("attention_8_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_8_k_rotate_half_concat_interleave_0 = const()[name = string("attention_8_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_8_k_rotate_half_concat = concat(axis = attention_8_k_rotate_half_concat_axis_0, interleave = attention_8_k_rotate_half_concat_interleave_0, values = (attention_8_k_rotate_half_neg, attention_8_k_rotate_half_split_0))[name = string("attention_8_k_rotate_half_concat")]; tensor attention_8_k_rope_rhs_mult = mul(x = attention_8_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_k_rope_rhs_mult")]; tensor attention_8_k_rope = add(x = attention_8_k_rope_lhs_mult, y = attention_8_k_rope_rhs_mult)[name = string("attention_8_k_rope")]; int32 attention_8_q_splits_axis_0 = const()[name = string("attention_8_q_splits_axis_0"), val = int32(1)]; int32 attention_8_q_splits_num_splits_0 = const()[name = string("attention_8_q_splits_num_splits_0"), val = int32(2)]; tensor attention_8_q_splits_0, tensor attention_8_q_splits_1 = split(axis = attention_8_q_splits_axis_0, num_splits = attention_8_q_splits_num_splits_0, x = attention_8_q_rope)[name = string("attention_8_q_splits")]; tensor attention_8_update_begin_0_values0_0 = const()[name = string("attention_8_update_begin_0_values0_0"), val = tensor([8])]; tensor attention_8_update_begin_0_values1_0 = const()[name = string("attention_8_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_8_update_begin_0_values3_0 = const()[name = string("attention_8_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_8_update_begin_0_axis_0 = const()[name = string("attention_8_update_begin_0_axis_0"), val = int32(0)]; bool attention_8_update_begin_0_interleave_0 = const()[name = string("attention_8_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_8_update_begin_0 = concat(axis = attention_8_update_begin_0_axis_0, interleave = attention_8_update_begin_0_interleave_0, values = (attention_8_update_begin_0_values0_0, attention_8_update_begin_0_values1_0, query_pos1, attention_8_update_begin_0_values3_0))[name = string("attention_8_update_begin_0")]; tensor attention_8_update_end_0_values0_0 = const()[name = string("attention_8_update_end_0_values0_0"), val = tensor([9])]; tensor attention_8_update_end_0_values1_0 = const()[name = string("attention_8_update_end_0_values1_0"), val = tensor([2])]; tensor attention_8_update_end_0_values3_0 = const()[name = string("attention_8_update_end_0_values3_0"), val = tensor([64])]; int32 attention_8_update_end_0_axis_0 = const()[name = string("attention_8_update_end_0_axis_0"), val = int32(0)]; bool attention_8_update_end_0_interleave_0 = const()[name = string("attention_8_update_end_0_interleave_0"), val = bool(false)]; tensor attention_8_update_end_0 = concat(axis = attention_8_update_end_0_axis_0, interleave = attention_8_update_end_0_interleave_0, values = (attention_8_update_end_0_values0_0, attention_8_update_end_0_values1_0, end_pos_0, attention_8_update_end_0_values3_0))[name = string("attention_8_update_end_0")]; tensor attention_8_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_updated_key_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_key_cache_0_squeeze_mask_0, update = attention_8_k_rope, x = coreml_update_state_14)[name = string("attention_8_updated_key_cache_0")]; write_state(data = attention_8_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_16_write_state")]; tensor coreml_update_state_16 = read_state(input = key_cache_state)[name = string("coreml_update_state_16")]; tensor attention_8_key_cache_begin_0 = const()[name = string("attention_8_key_cache_begin_0"), val = tensor([8, 0, 0, 0])]; tensor attention_8_key_cache_end_0 = const()[name = string("attention_8_key_cache_end_0"), val = tensor([9, 2, 512, 64])]; tensor attention_8_key_cache_squeeze_mask_0 = const()[name = string("attention_8_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_key_cache = slice_by_index(begin = attention_8_key_cache_begin_0, end = attention_8_key_cache_end_0, squeeze_mask = attention_8_key_cache_squeeze_mask_0, x = coreml_update_state_16)[name = string("attention_8_key_cache")]; int32 attention_8_key_cache_head_axis_0 = const()[name = string("attention_8_key_cache_head_axis_0"), val = int32(1)]; int32 attention_8_key_cache_head_num_splits_0 = const()[name = string("attention_8_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_8_key_cache_head_0, tensor attention_8_key_cache_head_1 = split(axis = attention_8_key_cache_head_axis_0, num_splits = attention_8_key_cache_head_num_splits_0, x = attention_8_key_cache)[name = string("attention_8_key_cache_head")]; tensor attention_8_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_updated_value_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_value_cache_0_squeeze_mask_0, update = attention_8_split_qkv_heads_2, x = coreml_update_state_15)[name = string("attention_8_updated_value_cache_0")]; write_state(data = attention_8_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_17_write_state")]; tensor coreml_update_state_17 = read_state(input = value_cache_state)[name = string("coreml_update_state_17")]; tensor attention_8_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_8_slice_current_layer_value_cache_begin_0"), val = tensor([8, 0, 0, 0])]; tensor attention_8_slice_current_layer_value_cache_end_0 = const()[name = string("attention_8_slice_current_layer_value_cache_end_0"), val = tensor([9, 2, 512, 64])]; tensor attention_8_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_8_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_slice_current_layer_value_cache = slice_by_index(begin = attention_8_slice_current_layer_value_cache_begin_0, end = attention_8_slice_current_layer_value_cache_end_0, squeeze_mask = attention_8_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_17)[name = string("attention_8_slice_current_layer_value_cache")]; int32 attention_8_slice_value_cache_heads_axis_0 = const()[name = string("attention_8_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_8_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_8_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_8_slice_value_cache_heads_0, tensor attention_8_slice_value_cache_heads_1 = split(axis = attention_8_slice_value_cache_heads_axis_0, num_splits = attention_8_slice_value_cache_heads_num_splits_0, x = attention_8_slice_current_layer_value_cache)[name = string("attention_8_slice_value_cache_heads")]; bool attention_8_scores_0_transpose_y_0 = const()[name = string("attention_8_scores_0_transpose_y_0"), val = bool(true)]; bool attention_8_scores_0_transpose_x_0 = const()[name = string("attention_8_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_8_scores_0 = matmul(transpose_x = attention_8_scores_0_transpose_x_0, transpose_y = attention_8_scores_0_transpose_y_0, x = attention_8_key_cache_head_0, y = attention_8_q_splits_0)[name = string("attention_8_scores_0")]; fp16 attention_8_scaled_scores_0_y_0 = const()[name = string("attention_8_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_8_scaled_scores_0 = mul(x = attention_8_scores_0, y = attention_8_scaled_scores_0_y_0)[name = string("attention_8_scaled_scores_0")]; tensor attention_8_masked_scaled_scores_0 = add(x = attention_8_scaled_scores_0, y = transpose_0)[name = string("attention_8_masked_scaled_scores_0")]; int32 softmax_16_axis_0 = const()[name = string("softmax_16_axis_0"), val = int32(-2)]; tensor softmax_16 = softmax(axis = softmax_16_axis_0, x = attention_8_masked_scaled_scores_0)[name = string("softmax_16")]; bool attention_8_attention_0_transpose_x_0 = const()[name = string("attention_8_attention_0_transpose_x_0"), val = bool(true)]; bool attention_8_attention_0_transpose_y_0 = const()[name = string("attention_8_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_8_attention_0 = matmul(transpose_x = attention_8_attention_0_transpose_x_0, transpose_y = attention_8_attention_0_transpose_y_0, x = softmax_16, y = attention_8_slice_value_cache_heads_0)[name = string("attention_8_attention_0")]; bool attention_8_scores_1_transpose_y_0 = const()[name = string("attention_8_scores_1_transpose_y_0"), val = bool(true)]; bool attention_8_scores_1_transpose_x_0 = const()[name = string("attention_8_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_8_scores_1 = matmul(transpose_x = attention_8_scores_1_transpose_x_0, transpose_y = attention_8_scores_1_transpose_y_0, x = attention_8_key_cache_head_1, y = attention_8_q_splits_1)[name = string("attention_8_scores_1")]; fp16 attention_8_scaled_scores_1_y_0 = const()[name = string("attention_8_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_8_scaled_scores_1 = mul(x = attention_8_scores_1, y = attention_8_scaled_scores_1_y_0)[name = string("attention_8_scaled_scores_1")]; tensor attention_8_masked_scaled_scores_1 = add(x = attention_8_scaled_scores_1, y = transpose_0)[name = string("attention_8_masked_scaled_scores_1")]; int32 softmax_17_axis_0 = const()[name = string("softmax_17_axis_0"), val = int32(-2)]; tensor softmax_17 = softmax(axis = softmax_17_axis_0, x = attention_8_masked_scaled_scores_1)[name = string("softmax_17")]; bool attention_8_attention_1_transpose_x_0 = const()[name = string("attention_8_attention_1_transpose_x_0"), val = bool(true)]; bool attention_8_attention_1_transpose_y_0 = const()[name = string("attention_8_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_8_attention_1 = matmul(transpose_x = attention_8_attention_1_transpose_x_0, transpose_y = attention_8_attention_1_transpose_y_0, x = softmax_17, y = attention_8_slice_value_cache_heads_1)[name = string("attention_8_attention_1")]; int32 attention_8_concat_attention_all_heads_axis_0 = const()[name = string("attention_8_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_8_concat_attention_all_heads_interleave_0 = const()[name = string("attention_8_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_8_concat_attention_all_heads = concat(axis = attention_8_concat_attention_all_heads_axis_0, interleave = attention_8_concat_attention_all_heads_interleave_0, values = (attention_8_attention_0, attention_8_attention_1))[name = string("attention_8_concat_attention_all_heads")]; tensor attention_8_channels_first_retransposed_perm_0 = const()[name = string("attention_8_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_8_reshaped_shape_0 = const()[name = string("attention_8_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_8_channels_first_retransposed = transpose(perm = attention_8_channels_first_retransposed_perm_0, x = attention_8_concat_attention_all_heads)[name = string("transpose_31")]; tensor attention_8_reshaped = reshape(shape = attention_8_reshaped_shape_0, x = attention_8_channels_first_retransposed)[name = string("attention_8_reshaped")]; tensor attention_8_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384907776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385509952))))[name = string("attention_8_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_36 = constexpr_blockwise_shift_scale(data = attention_8_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385538688))))[name = string("constexpr_blockwise_shift_scale_36")]; tensor attention_8_outproj_strides_0 = const()[name = string("attention_8_outproj_strides_0"), val = tensor([1])]; string attention_8_outproj_pad_type_0 = const()[name = string("attention_8_outproj_pad_type_0"), val = string("valid")]; tensor attention_8_outproj_pad_0 = const()[name = string("attention_8_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_8_outproj_dilations_0 = const()[name = string("attention_8_outproj_dilations_0"), val = tensor([1])]; int32 attention_8_outproj_groups_0 = const()[name = string("attention_8_outproj_groups_0"), val = int32(1)]; tensor attention_8_outproj = conv(dilations = attention_8_outproj_dilations_0, groups = attention_8_outproj_groups_0, pad = attention_8_outproj_pad_0, pad_type = attention_8_outproj_pad_type_0, strides = attention_8_outproj_strides_0, weight = constexpr_blockwise_shift_scale_36, x = attention_8_reshaped)[name = string("attention_8_outproj")]; tensor block_8_residual_1 = add(x = block_7_residual_2, y = attention_8_outproj)[name = string("block_8_residual_1")]; tensor block_8_ffn_rmsnorm_abs = abs(x = block_8_residual_1)[name = string("block_8_ffn_rmsnorm_abs")]; tensor block_8_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_8_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_8_ffn_rmsnorm_maxval = reduce_max(axes = block_8_ffn_rmsnorm_maxval_axes_0, keep_dims = block_8_ffn_rmsnorm_maxval_keep_dims_0, x = block_8_ffn_rmsnorm_abs)[name = string("block_8_ffn_rmsnorm_maxval")]; fp16 block_8_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_8_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_8_ffn_rmsnorm_maxval_clipped = clip(alpha = block_8_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_8_ffn_rmsnorm_maxval_clipped_beta_0, x = block_8_ffn_rmsnorm_maxval)[name = string("block_8_ffn_rmsnorm_maxval_clipped")]; tensor block_8_ffn_rmsnorm_scaled = real_div(x = block_8_residual_1, y = block_8_ffn_rmsnorm_maxval_clipped)[name = string("block_8_ffn_rmsnorm_scaled")]; tensor block_8_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_8_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_8_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_8_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_8_ffn_rmsnorm_scaled)[name = string("block_8_ffn_rmsnorm_squared_sum")]; fp16 block_8_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_8_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_8_ffn_rmsnorm_rsqrt_epsilon_0, x = block_8_ffn_rmsnorm_squared_sum)[name = string("block_8_ffn_rmsnorm_rsqrt")]; fp16 block_8_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_8_ffn_rmsnorm_dim_scaled = mul(x = block_8_ffn_rmsnorm_scaled, y = block_8_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_8_ffn_rmsnorm_dim_scaled")]; tensor block_8_ffn_rmsnorm_normalized = mul(x = block_8_ffn_rmsnorm_dim_scaled, y = block_8_ffn_rmsnorm_rsqrt)[name = string("block_8_ffn_rmsnorm_normalized")]; tensor block_8_ffn_rmsnorm_y_0 = const()[name = string("block_8_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385540544)))]; tensor block_8_ffn_rmsnorm = mul(x = block_8_ffn_rmsnorm_normalized, y = block_8_ffn_rmsnorm_y_0)[name = string("block_8_ffn_rmsnorm")]; tensor block_8_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385542400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388811072))))[name = string("block_8_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_37 = constexpr_blockwise_shift_scale(data = block_8_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388966784))))[name = string("constexpr_blockwise_shift_scale_37")]; tensor block_8_ffn_inproj_strides_0 = const()[name = string("block_8_ffn_inproj_strides_0"), val = tensor([1])]; string block_8_ffn_inproj_pad_type_0 = const()[name = string("block_8_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_8_ffn_inproj_pad_0 = const()[name = string("block_8_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_8_ffn_inproj_dilations_0 = const()[name = string("block_8_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_8_ffn_inproj_groups_0 = const()[name = string("block_8_ffn_inproj_groups_0"), val = int32(1)]; tensor block_8_ffn_inproj = conv(dilations = block_8_ffn_inproj_dilations_0, groups = block_8_ffn_inproj_groups_0, pad = block_8_ffn_inproj_pad_0, pad_type = block_8_ffn_inproj_pad_type_0, strides = block_8_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_37, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_inproj")]; tensor block_8_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388976576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392245248))))[name = string("block_8_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_38 = constexpr_blockwise_shift_scale(data = block_8_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392400960))))[name = string("constexpr_blockwise_shift_scale_38")]; tensor block_8_ffn_g_strides_0 = const()[name = string("block_8_ffn_g_strides_0"), val = tensor([1])]; string block_8_ffn_g_pad_type_0 = const()[name = string("block_8_ffn_g_pad_type_0"), val = string("valid")]; tensor block_8_ffn_g_pad_0 = const()[name = string("block_8_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_8_ffn_g_dilations_0 = const()[name = string("block_8_ffn_g_dilations_0"), val = tensor([1])]; int32 block_8_ffn_g_groups_0 = const()[name = string("block_8_ffn_g_groups_0"), val = int32(1)]; tensor block_8_ffn_g = conv(dilations = block_8_ffn_g_dilations_0, groups = block_8_ffn_g_groups_0, pad = block_8_ffn_g_pad_0, pad_type = block_8_ffn_g_pad_type_0, strides = block_8_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_38, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_g")]; tensor block_8_ffn_g_activation = silu(x = block_8_ffn_g)[name = string("block_8_ffn_g_activation")]; tensor block_8_ffn_x_gated = mul(x = block_8_ffn_inproj, y = block_8_ffn_g_activation)[name = string("block_8_ffn_x_gated")]; tensor block_8_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392410752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395679424))))[name = string("block_8_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_39 = constexpr_blockwise_shift_scale(data = block_8_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395708160))))[name = string("constexpr_blockwise_shift_scale_39")]; tensor block_8_ffn_outproj_strides_0 = const()[name = string("block_8_ffn_outproj_strides_0"), val = tensor([1])]; string block_8_ffn_outproj_pad_type_0 = const()[name = string("block_8_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_8_ffn_outproj_pad_0 = const()[name = string("block_8_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_8_ffn_outproj_dilations_0 = const()[name = string("block_8_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_8_ffn_outproj_groups_0 = const()[name = string("block_8_ffn_outproj_groups_0"), val = int32(1)]; tensor block_8_ffn_outproj = conv(dilations = block_8_ffn_outproj_dilations_0, groups = block_8_ffn_outproj_groups_0, pad = block_8_ffn_outproj_pad_0, pad_type = block_8_ffn_outproj_pad_type_0, strides = block_8_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_39, x = block_8_ffn_x_gated)[name = string("block_8_ffn_outproj")]; tensor block_8_residual_2 = add(x = block_8_ffn_outproj, y = block_8_residual_1)[name = string("block_8_residual_2")]; tensor block_9_attention_rmsnorm_abs = abs(x = block_8_residual_2)[name = string("block_9_attention_rmsnorm_abs")]; tensor block_9_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_9_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_9_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_9_attention_rmsnorm_maxval = reduce_max(axes = block_9_attention_rmsnorm_maxval_axes_0, keep_dims = block_9_attention_rmsnorm_maxval_keep_dims_0, x = block_9_attention_rmsnorm_abs)[name = string("block_9_attention_rmsnorm_maxval")]; fp16 block_9_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_9_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_9_attention_rmsnorm_maxval_clipped = clip(alpha = block_9_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_9_attention_rmsnorm_maxval_clipped_beta_0, x = block_9_attention_rmsnorm_maxval)[name = string("block_9_attention_rmsnorm_maxval_clipped")]; tensor block_9_attention_rmsnorm_scaled = real_div(x = block_8_residual_2, y = block_9_attention_rmsnorm_maxval_clipped)[name = string("block_9_attention_rmsnorm_scaled")]; tensor block_9_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_9_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_9_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_9_attention_rmsnorm_squared_sum_keep_dims_0, x = block_9_attention_rmsnorm_scaled)[name = string("block_9_attention_rmsnorm_squared_sum")]; fp16 block_9_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_9_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_9_attention_rmsnorm_rsqrt_epsilon_0, x = block_9_attention_rmsnorm_squared_sum)[name = string("block_9_attention_rmsnorm_rsqrt")]; fp16 block_9_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_9_attention_rmsnorm_dim_scaled = mul(x = block_9_attention_rmsnorm_scaled, y = block_9_attention_rmsnorm_dim_scaled_y_0)[name = string("block_9_attention_rmsnorm_dim_scaled")]; tensor block_9_attention_rmsnorm_normalized = mul(x = block_9_attention_rmsnorm_dim_scaled, y = block_9_attention_rmsnorm_rsqrt)[name = string("block_9_attention_rmsnorm_normalized")]; tensor block_9_attention_rmsnorm_y_0 = const()[name = string("block_9_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395710016)))]; tensor block_9_attention_rmsnorm = mul(x = block_9_attention_rmsnorm_normalized, y = block_9_attention_rmsnorm_y_0)[name = string("block_9_attention_rmsnorm")]; tensor attention_9_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395711872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396486080))))[name = string("attention_9_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_40 = constexpr_blockwise_shift_scale(data = attention_9_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396523008))))[name = string("constexpr_blockwise_shift_scale_40")]; tensor attention_9_qkvproj_bias_0 = const()[name = string("attention_9_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396525376)))]; tensor attention_9_qkvproj_strides_0 = const()[name = string("attention_9_qkvproj_strides_0"), val = tensor([1])]; string attention_9_qkvproj_pad_type_0 = const()[name = string("attention_9_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_9_qkvproj_pad_0 = const()[name = string("attention_9_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_9_qkvproj_dilations_0 = const()[name = string("attention_9_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_9_qkvproj_groups_0 = const()[name = string("attention_9_qkvproj_groups_0"), val = int32(1)]; tensor attention_9_qkvproj = conv(bias = attention_9_qkvproj_bias_0, dilations = attention_9_qkvproj_dilations_0, groups = attention_9_qkvproj_groups_0, pad = attention_9_qkvproj_pad_0, pad_type = attention_9_qkvproj_pad_type_0, strides = attention_9_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_40, x = block_9_attention_rmsnorm)[name = string("attention_9_qkvproj")]; tensor attention_9_head_reshape_shape_0 = const()[name = string("attention_9_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_9_head_reshape = reshape(shape = attention_9_head_reshape_shape_0, x = attention_9_qkvproj)[name = string("attention_9_head_reshape")]; tensor attention_9_head_transpose_perm_0 = const()[name = string("attention_9_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_9_split_qkv_heads_axis_0 = const()[name = string("attention_9_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_9_split_qkv_heads_split_sizes_0 = const()[name = string("attention_9_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_9_head_transpose = transpose(perm = attention_9_head_transpose_perm_0, x = attention_9_head_reshape)[name = string("transpose_30")]; tensor attention_9_split_qkv_heads_0, tensor attention_9_split_qkv_heads_1, tensor attention_9_split_qkv_heads_2 = split(axis = attention_9_split_qkv_heads_axis_0, split_sizes = attention_9_split_qkv_heads_split_sizes_0, x = attention_9_head_transpose)[name = string("attention_9_split_qkv_heads")]; tensor attention_9_q_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_9_q_rope_lhs_mult")]; int32 attention_9_q_rotate_half_split_num_splits_0 = const()[name = string("attention_9_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_9_q_rotate_half_split_axis_0 = const()[name = string("attention_9_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_9_q_rotate_half_split_0, tensor attention_9_q_rotate_half_split_1 = split(axis = attention_9_q_rotate_half_split_axis_0, num_splits = attention_9_q_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_0)[name = string("attention_9_q_rotate_half_split")]; fp16 attention_9_q_rotate_half_neg_y_0 = const()[name = string("attention_9_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_9_q_rotate_half_neg = mul(x = attention_9_q_rotate_half_split_1, y = attention_9_q_rotate_half_neg_y_0)[name = string("attention_9_q_rotate_half_neg")]; int32 attention_9_q_rotate_half_concat_axis_0 = const()[name = string("attention_9_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_9_q_rotate_half_concat_interleave_0 = const()[name = string("attention_9_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_9_q_rotate_half_concat = concat(axis = attention_9_q_rotate_half_concat_axis_0, interleave = attention_9_q_rotate_half_concat_interleave_0, values = (attention_9_q_rotate_half_neg, attention_9_q_rotate_half_split_0))[name = string("attention_9_q_rotate_half_concat")]; tensor attention_9_q_rope_rhs_mult = mul(x = attention_9_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_q_rope_rhs_mult")]; tensor attention_9_q_rope = add(x = attention_9_q_rope_lhs_mult, y = attention_9_q_rope_rhs_mult)[name = string("attention_9_q_rope")]; tensor attention_9_k_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_9_k_rope_lhs_mult")]; int32 attention_9_k_rotate_half_split_num_splits_0 = const()[name = string("attention_9_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_9_k_rotate_half_split_axis_0 = const()[name = string("attention_9_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_9_k_rotate_half_split_0, tensor attention_9_k_rotate_half_split_1 = split(axis = attention_9_k_rotate_half_split_axis_0, num_splits = attention_9_k_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_1)[name = string("attention_9_k_rotate_half_split")]; fp16 attention_9_k_rotate_half_neg_y_0 = const()[name = string("attention_9_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_9_k_rotate_half_neg = mul(x = attention_9_k_rotate_half_split_1, y = attention_9_k_rotate_half_neg_y_0)[name = string("attention_9_k_rotate_half_neg")]; int32 attention_9_k_rotate_half_concat_axis_0 = const()[name = string("attention_9_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_9_k_rotate_half_concat_interleave_0 = const()[name = string("attention_9_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_9_k_rotate_half_concat = concat(axis = attention_9_k_rotate_half_concat_axis_0, interleave = attention_9_k_rotate_half_concat_interleave_0, values = (attention_9_k_rotate_half_neg, attention_9_k_rotate_half_split_0))[name = string("attention_9_k_rotate_half_concat")]; tensor attention_9_k_rope_rhs_mult = mul(x = attention_9_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_k_rope_rhs_mult")]; tensor attention_9_k_rope = add(x = attention_9_k_rope_lhs_mult, y = attention_9_k_rope_rhs_mult)[name = string("attention_9_k_rope")]; int32 attention_9_q_splits_axis_0 = const()[name = string("attention_9_q_splits_axis_0"), val = int32(1)]; int32 attention_9_q_splits_num_splits_0 = const()[name = string("attention_9_q_splits_num_splits_0"), val = int32(2)]; tensor attention_9_q_splits_0, tensor attention_9_q_splits_1 = split(axis = attention_9_q_splits_axis_0, num_splits = attention_9_q_splits_num_splits_0, x = attention_9_q_rope)[name = string("attention_9_q_splits")]; tensor attention_9_update_begin_0_values0_0 = const()[name = string("attention_9_update_begin_0_values0_0"), val = tensor([9])]; tensor attention_9_update_begin_0_values1_0 = const()[name = string("attention_9_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_9_update_begin_0_values3_0 = const()[name = string("attention_9_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_9_update_begin_0_axis_0 = const()[name = string("attention_9_update_begin_0_axis_0"), val = int32(0)]; bool attention_9_update_begin_0_interleave_0 = const()[name = string("attention_9_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_9_update_begin_0 = concat(axis = attention_9_update_begin_0_axis_0, interleave = attention_9_update_begin_0_interleave_0, values = (attention_9_update_begin_0_values0_0, attention_9_update_begin_0_values1_0, query_pos1, attention_9_update_begin_0_values3_0))[name = string("attention_9_update_begin_0")]; tensor attention_9_update_end_0_values0_0 = const()[name = string("attention_9_update_end_0_values0_0"), val = tensor([10])]; tensor attention_9_update_end_0_values1_0 = const()[name = string("attention_9_update_end_0_values1_0"), val = tensor([2])]; tensor attention_9_update_end_0_values3_0 = const()[name = string("attention_9_update_end_0_values3_0"), val = tensor([64])]; int32 attention_9_update_end_0_axis_0 = const()[name = string("attention_9_update_end_0_axis_0"), val = int32(0)]; bool attention_9_update_end_0_interleave_0 = const()[name = string("attention_9_update_end_0_interleave_0"), val = bool(false)]; tensor attention_9_update_end_0 = concat(axis = attention_9_update_end_0_axis_0, interleave = attention_9_update_end_0_interleave_0, values = (attention_9_update_end_0_values0_0, attention_9_update_end_0_values1_0, end_pos_0, attention_9_update_end_0_values3_0))[name = string("attention_9_update_end_0")]; tensor attention_9_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_updated_key_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_key_cache_0_squeeze_mask_0, update = attention_9_k_rope, x = coreml_update_state_16)[name = string("attention_9_updated_key_cache_0")]; write_state(data = attention_9_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_18_write_state")]; tensor coreml_update_state_18 = read_state(input = key_cache_state)[name = string("coreml_update_state_18")]; tensor attention_9_key_cache_begin_0 = const()[name = string("attention_9_key_cache_begin_0"), val = tensor([9, 0, 0, 0])]; tensor attention_9_key_cache_end_0 = const()[name = string("attention_9_key_cache_end_0"), val = tensor([10, 2, 512, 64])]; tensor attention_9_key_cache_squeeze_mask_0 = const()[name = string("attention_9_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_key_cache = slice_by_index(begin = attention_9_key_cache_begin_0, end = attention_9_key_cache_end_0, squeeze_mask = attention_9_key_cache_squeeze_mask_0, x = coreml_update_state_18)[name = string("attention_9_key_cache")]; int32 attention_9_key_cache_head_axis_0 = const()[name = string("attention_9_key_cache_head_axis_0"), val = int32(1)]; int32 attention_9_key_cache_head_num_splits_0 = const()[name = string("attention_9_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_9_key_cache_head_0, tensor attention_9_key_cache_head_1 = split(axis = attention_9_key_cache_head_axis_0, num_splits = attention_9_key_cache_head_num_splits_0, x = attention_9_key_cache)[name = string("attention_9_key_cache_head")]; tensor attention_9_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_updated_value_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_value_cache_0_squeeze_mask_0, update = attention_9_split_qkv_heads_2, x = coreml_update_state_17)[name = string("attention_9_updated_value_cache_0")]; write_state(data = attention_9_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_19_write_state")]; tensor coreml_update_state_19 = read_state(input = value_cache_state)[name = string("coreml_update_state_19")]; tensor attention_9_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_9_slice_current_layer_value_cache_begin_0"), val = tensor([9, 0, 0, 0])]; tensor attention_9_slice_current_layer_value_cache_end_0 = const()[name = string("attention_9_slice_current_layer_value_cache_end_0"), val = tensor([10, 2, 512, 64])]; tensor attention_9_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_9_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_slice_current_layer_value_cache = slice_by_index(begin = attention_9_slice_current_layer_value_cache_begin_0, end = attention_9_slice_current_layer_value_cache_end_0, squeeze_mask = attention_9_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_19)[name = string("attention_9_slice_current_layer_value_cache")]; int32 attention_9_slice_value_cache_heads_axis_0 = const()[name = string("attention_9_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_9_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_9_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_9_slice_value_cache_heads_0, tensor attention_9_slice_value_cache_heads_1 = split(axis = attention_9_slice_value_cache_heads_axis_0, num_splits = attention_9_slice_value_cache_heads_num_splits_0, x = attention_9_slice_current_layer_value_cache)[name = string("attention_9_slice_value_cache_heads")]; bool attention_9_scores_0_transpose_y_0 = const()[name = string("attention_9_scores_0_transpose_y_0"), val = bool(true)]; bool attention_9_scores_0_transpose_x_0 = const()[name = string("attention_9_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_9_scores_0 = matmul(transpose_x = attention_9_scores_0_transpose_x_0, transpose_y = attention_9_scores_0_transpose_y_0, x = attention_9_key_cache_head_0, y = attention_9_q_splits_0)[name = string("attention_9_scores_0")]; fp16 attention_9_scaled_scores_0_y_0 = const()[name = string("attention_9_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_9_scaled_scores_0 = mul(x = attention_9_scores_0, y = attention_9_scaled_scores_0_y_0)[name = string("attention_9_scaled_scores_0")]; tensor attention_9_masked_scaled_scores_0 = add(x = attention_9_scaled_scores_0, y = transpose_0)[name = string("attention_9_masked_scaled_scores_0")]; int32 softmax_18_axis_0 = const()[name = string("softmax_18_axis_0"), val = int32(-2)]; tensor softmax_18 = softmax(axis = softmax_18_axis_0, x = attention_9_masked_scaled_scores_0)[name = string("softmax_18")]; bool attention_9_attention_0_transpose_x_0 = const()[name = string("attention_9_attention_0_transpose_x_0"), val = bool(true)]; bool attention_9_attention_0_transpose_y_0 = const()[name = string("attention_9_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_9_attention_0 = matmul(transpose_x = attention_9_attention_0_transpose_x_0, transpose_y = attention_9_attention_0_transpose_y_0, x = softmax_18, y = attention_9_slice_value_cache_heads_0)[name = string("attention_9_attention_0")]; bool attention_9_scores_1_transpose_y_0 = const()[name = string("attention_9_scores_1_transpose_y_0"), val = bool(true)]; bool attention_9_scores_1_transpose_x_0 = const()[name = string("attention_9_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_9_scores_1 = matmul(transpose_x = attention_9_scores_1_transpose_x_0, transpose_y = attention_9_scores_1_transpose_y_0, x = attention_9_key_cache_head_1, y = attention_9_q_splits_1)[name = string("attention_9_scores_1")]; fp16 attention_9_scaled_scores_1_y_0 = const()[name = string("attention_9_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_9_scaled_scores_1 = mul(x = attention_9_scores_1, y = attention_9_scaled_scores_1_y_0)[name = string("attention_9_scaled_scores_1")]; tensor attention_9_masked_scaled_scores_1 = add(x = attention_9_scaled_scores_1, y = transpose_0)[name = string("attention_9_masked_scaled_scores_1")]; int32 softmax_19_axis_0 = const()[name = string("softmax_19_axis_0"), val = int32(-2)]; tensor softmax_19 = softmax(axis = softmax_19_axis_0, x = attention_9_masked_scaled_scores_1)[name = string("softmax_19")]; bool attention_9_attention_1_transpose_x_0 = const()[name = string("attention_9_attention_1_transpose_x_0"), val = bool(true)]; bool attention_9_attention_1_transpose_y_0 = const()[name = string("attention_9_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_9_attention_1 = matmul(transpose_x = attention_9_attention_1_transpose_x_0, transpose_y = attention_9_attention_1_transpose_y_0, x = softmax_19, y = attention_9_slice_value_cache_heads_1)[name = string("attention_9_attention_1")]; int32 attention_9_concat_attention_all_heads_axis_0 = const()[name = string("attention_9_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_9_concat_attention_all_heads_interleave_0 = const()[name = string("attention_9_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_9_concat_attention_all_heads = concat(axis = attention_9_concat_attention_all_heads_axis_0, interleave = attention_9_concat_attention_all_heads_interleave_0, values = (attention_9_attention_0, attention_9_attention_1))[name = string("attention_9_concat_attention_all_heads")]; tensor attention_9_channels_first_retransposed_perm_0 = const()[name = string("attention_9_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_9_reshaped_shape_0 = const()[name = string("attention_9_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_9_channels_first_retransposed = transpose(perm = attention_9_channels_first_retransposed_perm_0, x = attention_9_concat_attention_all_heads)[name = string("transpose_29")]; tensor attention_9_reshaped = reshape(shape = attention_9_reshaped_shape_0, x = attention_9_channels_first_retransposed)[name = string("attention_9_reshaped")]; tensor attention_9_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396527744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397129920))))[name = string("attention_9_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_41 = constexpr_blockwise_shift_scale(data = attention_9_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397158656))))[name = string("constexpr_blockwise_shift_scale_41")]; tensor attention_9_outproj_strides_0 = const()[name = string("attention_9_outproj_strides_0"), val = tensor([1])]; string attention_9_outproj_pad_type_0 = const()[name = string("attention_9_outproj_pad_type_0"), val = string("valid")]; tensor attention_9_outproj_pad_0 = const()[name = string("attention_9_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_9_outproj_dilations_0 = const()[name = string("attention_9_outproj_dilations_0"), val = tensor([1])]; int32 attention_9_outproj_groups_0 = const()[name = string("attention_9_outproj_groups_0"), val = int32(1)]; tensor attention_9_outproj = conv(dilations = attention_9_outproj_dilations_0, groups = attention_9_outproj_groups_0, pad = attention_9_outproj_pad_0, pad_type = attention_9_outproj_pad_type_0, strides = attention_9_outproj_strides_0, weight = constexpr_blockwise_shift_scale_41, x = attention_9_reshaped)[name = string("attention_9_outproj")]; tensor block_9_residual_1 = add(x = block_8_residual_2, y = attention_9_outproj)[name = string("block_9_residual_1")]; tensor block_9_ffn_rmsnorm_abs = abs(x = block_9_residual_1)[name = string("block_9_ffn_rmsnorm_abs")]; tensor block_9_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_9_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_9_ffn_rmsnorm_maxval = reduce_max(axes = block_9_ffn_rmsnorm_maxval_axes_0, keep_dims = block_9_ffn_rmsnorm_maxval_keep_dims_0, x = block_9_ffn_rmsnorm_abs)[name = string("block_9_ffn_rmsnorm_maxval")]; fp16 block_9_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_9_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_9_ffn_rmsnorm_maxval_clipped = clip(alpha = block_9_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_9_ffn_rmsnorm_maxval_clipped_beta_0, x = block_9_ffn_rmsnorm_maxval)[name = string("block_9_ffn_rmsnorm_maxval_clipped")]; tensor block_9_ffn_rmsnorm_scaled = real_div(x = block_9_residual_1, y = block_9_ffn_rmsnorm_maxval_clipped)[name = string("block_9_ffn_rmsnorm_scaled")]; tensor block_9_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_9_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_9_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_9_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_9_ffn_rmsnorm_scaled)[name = string("block_9_ffn_rmsnorm_squared_sum")]; fp16 block_9_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_9_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_9_ffn_rmsnorm_rsqrt_epsilon_0, x = block_9_ffn_rmsnorm_squared_sum)[name = string("block_9_ffn_rmsnorm_rsqrt")]; fp16 block_9_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_9_ffn_rmsnorm_dim_scaled = mul(x = block_9_ffn_rmsnorm_scaled, y = block_9_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_9_ffn_rmsnorm_dim_scaled")]; tensor block_9_ffn_rmsnorm_normalized = mul(x = block_9_ffn_rmsnorm_dim_scaled, y = block_9_ffn_rmsnorm_rsqrt)[name = string("block_9_ffn_rmsnorm_normalized")]; tensor block_9_ffn_rmsnorm_y_0 = const()[name = string("block_9_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397160512)))]; tensor block_9_ffn_rmsnorm = mul(x = block_9_ffn_rmsnorm_normalized, y = block_9_ffn_rmsnorm_y_0)[name = string("block_9_ffn_rmsnorm")]; tensor block_9_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397162368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400431040))))[name = string("block_9_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_42 = constexpr_blockwise_shift_scale(data = block_9_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400586752))))[name = string("constexpr_blockwise_shift_scale_42")]; tensor block_9_ffn_inproj_strides_0 = const()[name = string("block_9_ffn_inproj_strides_0"), val = tensor([1])]; string block_9_ffn_inproj_pad_type_0 = const()[name = string("block_9_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_9_ffn_inproj_pad_0 = const()[name = string("block_9_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_9_ffn_inproj_dilations_0 = const()[name = string("block_9_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_9_ffn_inproj_groups_0 = const()[name = string("block_9_ffn_inproj_groups_0"), val = int32(1)]; tensor block_9_ffn_inproj = conv(dilations = block_9_ffn_inproj_dilations_0, groups = block_9_ffn_inproj_groups_0, pad = block_9_ffn_inproj_pad_0, pad_type = block_9_ffn_inproj_pad_type_0, strides = block_9_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_42, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_inproj")]; tensor block_9_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400596544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403865216))))[name = string("block_9_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_43 = constexpr_blockwise_shift_scale(data = block_9_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404020928))))[name = string("constexpr_blockwise_shift_scale_43")]; tensor block_9_ffn_g_strides_0 = const()[name = string("block_9_ffn_g_strides_0"), val = tensor([1])]; string block_9_ffn_g_pad_type_0 = const()[name = string("block_9_ffn_g_pad_type_0"), val = string("valid")]; tensor block_9_ffn_g_pad_0 = const()[name = string("block_9_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_9_ffn_g_dilations_0 = const()[name = string("block_9_ffn_g_dilations_0"), val = tensor([1])]; int32 block_9_ffn_g_groups_0 = const()[name = string("block_9_ffn_g_groups_0"), val = int32(1)]; tensor block_9_ffn_g = conv(dilations = block_9_ffn_g_dilations_0, groups = block_9_ffn_g_groups_0, pad = block_9_ffn_g_pad_0, pad_type = block_9_ffn_g_pad_type_0, strides = block_9_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_43, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_g")]; tensor block_9_ffn_g_activation = silu(x = block_9_ffn_g)[name = string("block_9_ffn_g_activation")]; tensor block_9_ffn_x_gated = mul(x = block_9_ffn_inproj, y = block_9_ffn_g_activation)[name = string("block_9_ffn_x_gated")]; tensor block_9_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404030720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407299392))))[name = string("block_9_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_44 = constexpr_blockwise_shift_scale(data = block_9_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407328128))))[name = string("constexpr_blockwise_shift_scale_44")]; tensor block_9_ffn_outproj_strides_0 = const()[name = string("block_9_ffn_outproj_strides_0"), val = tensor([1])]; string block_9_ffn_outproj_pad_type_0 = const()[name = string("block_9_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_9_ffn_outproj_pad_0 = const()[name = string("block_9_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_9_ffn_outproj_dilations_0 = const()[name = string("block_9_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_9_ffn_outproj_groups_0 = const()[name = string("block_9_ffn_outproj_groups_0"), val = int32(1)]; tensor block_9_ffn_outproj = conv(dilations = block_9_ffn_outproj_dilations_0, groups = block_9_ffn_outproj_groups_0, pad = block_9_ffn_outproj_pad_0, pad_type = block_9_ffn_outproj_pad_type_0, strides = block_9_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_44, x = block_9_ffn_x_gated)[name = string("block_9_ffn_outproj")]; tensor block_9_residual_2 = add(x = block_9_ffn_outproj, y = block_9_residual_1)[name = string("block_9_residual_2")]; tensor block_10_attention_rmsnorm_abs = abs(x = block_9_residual_2)[name = string("block_10_attention_rmsnorm_abs")]; tensor block_10_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_10_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_10_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_10_attention_rmsnorm_maxval = reduce_max(axes = block_10_attention_rmsnorm_maxval_axes_0, keep_dims = block_10_attention_rmsnorm_maxval_keep_dims_0, x = block_10_attention_rmsnorm_abs)[name = string("block_10_attention_rmsnorm_maxval")]; fp16 block_10_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_10_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_10_attention_rmsnorm_maxval_clipped = clip(alpha = block_10_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_10_attention_rmsnorm_maxval_clipped_beta_0, x = block_10_attention_rmsnorm_maxval)[name = string("block_10_attention_rmsnorm_maxval_clipped")]; tensor block_10_attention_rmsnorm_scaled = real_div(x = block_9_residual_2, y = block_10_attention_rmsnorm_maxval_clipped)[name = string("block_10_attention_rmsnorm_scaled")]; tensor block_10_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_10_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_10_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_10_attention_rmsnorm_squared_sum_keep_dims_0, x = block_10_attention_rmsnorm_scaled)[name = string("block_10_attention_rmsnorm_squared_sum")]; fp16 block_10_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_10_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_10_attention_rmsnorm_rsqrt_epsilon_0, x = block_10_attention_rmsnorm_squared_sum)[name = string("block_10_attention_rmsnorm_rsqrt")]; fp16 block_10_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_10_attention_rmsnorm_dim_scaled = mul(x = block_10_attention_rmsnorm_scaled, y = block_10_attention_rmsnorm_dim_scaled_y_0)[name = string("block_10_attention_rmsnorm_dim_scaled")]; tensor block_10_attention_rmsnorm_normalized = mul(x = block_10_attention_rmsnorm_dim_scaled, y = block_10_attention_rmsnorm_rsqrt)[name = string("block_10_attention_rmsnorm_normalized")]; tensor block_10_attention_rmsnorm_y_0 = const()[name = string("block_10_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407329984)))]; tensor block_10_attention_rmsnorm = mul(x = block_10_attention_rmsnorm_normalized, y = block_10_attention_rmsnorm_y_0)[name = string("block_10_attention_rmsnorm")]; tensor attention_10_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407331840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408106048))))[name = string("attention_10_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_45 = constexpr_blockwise_shift_scale(data = attention_10_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408142976))))[name = string("constexpr_blockwise_shift_scale_45")]; tensor attention_10_qkvproj_bias_0 = const()[name = string("attention_10_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408145344)))]; tensor attention_10_qkvproj_strides_0 = const()[name = string("attention_10_qkvproj_strides_0"), val = tensor([1])]; string attention_10_qkvproj_pad_type_0 = const()[name = string("attention_10_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_10_qkvproj_pad_0 = const()[name = string("attention_10_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_10_qkvproj_dilations_0 = const()[name = string("attention_10_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_10_qkvproj_groups_0 = const()[name = string("attention_10_qkvproj_groups_0"), val = int32(1)]; tensor attention_10_qkvproj = conv(bias = attention_10_qkvproj_bias_0, dilations = attention_10_qkvproj_dilations_0, groups = attention_10_qkvproj_groups_0, pad = attention_10_qkvproj_pad_0, pad_type = attention_10_qkvproj_pad_type_0, strides = attention_10_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_45, x = block_10_attention_rmsnorm)[name = string("attention_10_qkvproj")]; tensor attention_10_head_reshape_shape_0 = const()[name = string("attention_10_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_10_head_reshape = reshape(shape = attention_10_head_reshape_shape_0, x = attention_10_qkvproj)[name = string("attention_10_head_reshape")]; tensor attention_10_head_transpose_perm_0 = const()[name = string("attention_10_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_10_split_qkv_heads_axis_0 = const()[name = string("attention_10_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_10_split_qkv_heads_split_sizes_0 = const()[name = string("attention_10_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_10_head_transpose = transpose(perm = attention_10_head_transpose_perm_0, x = attention_10_head_reshape)[name = string("transpose_28")]; tensor attention_10_split_qkv_heads_0, tensor attention_10_split_qkv_heads_1, tensor attention_10_split_qkv_heads_2 = split(axis = attention_10_split_qkv_heads_axis_0, split_sizes = attention_10_split_qkv_heads_split_sizes_0, x = attention_10_head_transpose)[name = string("attention_10_split_qkv_heads")]; tensor attention_10_q_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_10_q_rope_lhs_mult")]; int32 attention_10_q_rotate_half_split_num_splits_0 = const()[name = string("attention_10_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_10_q_rotate_half_split_axis_0 = const()[name = string("attention_10_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_10_q_rotate_half_split_0, tensor attention_10_q_rotate_half_split_1 = split(axis = attention_10_q_rotate_half_split_axis_0, num_splits = attention_10_q_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_0)[name = string("attention_10_q_rotate_half_split")]; fp16 attention_10_q_rotate_half_neg_y_0 = const()[name = string("attention_10_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_10_q_rotate_half_neg = mul(x = attention_10_q_rotate_half_split_1, y = attention_10_q_rotate_half_neg_y_0)[name = string("attention_10_q_rotate_half_neg")]; int32 attention_10_q_rotate_half_concat_axis_0 = const()[name = string("attention_10_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_10_q_rotate_half_concat_interleave_0 = const()[name = string("attention_10_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_10_q_rotate_half_concat = concat(axis = attention_10_q_rotate_half_concat_axis_0, interleave = attention_10_q_rotate_half_concat_interleave_0, values = (attention_10_q_rotate_half_neg, attention_10_q_rotate_half_split_0))[name = string("attention_10_q_rotate_half_concat")]; tensor attention_10_q_rope_rhs_mult = mul(x = attention_10_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_q_rope_rhs_mult")]; tensor attention_10_q_rope = add(x = attention_10_q_rope_lhs_mult, y = attention_10_q_rope_rhs_mult)[name = string("attention_10_q_rope")]; tensor attention_10_k_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_10_k_rope_lhs_mult")]; int32 attention_10_k_rotate_half_split_num_splits_0 = const()[name = string("attention_10_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_10_k_rotate_half_split_axis_0 = const()[name = string("attention_10_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_10_k_rotate_half_split_0, tensor attention_10_k_rotate_half_split_1 = split(axis = attention_10_k_rotate_half_split_axis_0, num_splits = attention_10_k_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_1)[name = string("attention_10_k_rotate_half_split")]; fp16 attention_10_k_rotate_half_neg_y_0 = const()[name = string("attention_10_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_10_k_rotate_half_neg = mul(x = attention_10_k_rotate_half_split_1, y = attention_10_k_rotate_half_neg_y_0)[name = string("attention_10_k_rotate_half_neg")]; int32 attention_10_k_rotate_half_concat_axis_0 = const()[name = string("attention_10_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_10_k_rotate_half_concat_interleave_0 = const()[name = string("attention_10_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_10_k_rotate_half_concat = concat(axis = attention_10_k_rotate_half_concat_axis_0, interleave = attention_10_k_rotate_half_concat_interleave_0, values = (attention_10_k_rotate_half_neg, attention_10_k_rotate_half_split_0))[name = string("attention_10_k_rotate_half_concat")]; tensor attention_10_k_rope_rhs_mult = mul(x = attention_10_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_k_rope_rhs_mult")]; tensor attention_10_k_rope = add(x = attention_10_k_rope_lhs_mult, y = attention_10_k_rope_rhs_mult)[name = string("attention_10_k_rope")]; int32 attention_10_q_splits_axis_0 = const()[name = string("attention_10_q_splits_axis_0"), val = int32(1)]; int32 attention_10_q_splits_num_splits_0 = const()[name = string("attention_10_q_splits_num_splits_0"), val = int32(2)]; tensor attention_10_q_splits_0, tensor attention_10_q_splits_1 = split(axis = attention_10_q_splits_axis_0, num_splits = attention_10_q_splits_num_splits_0, x = attention_10_q_rope)[name = string("attention_10_q_splits")]; tensor attention_10_update_begin_0_values0_0 = const()[name = string("attention_10_update_begin_0_values0_0"), val = tensor([10])]; tensor attention_10_update_begin_0_values1_0 = const()[name = string("attention_10_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_10_update_begin_0_values3_0 = const()[name = string("attention_10_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_10_update_begin_0_axis_0 = const()[name = string("attention_10_update_begin_0_axis_0"), val = int32(0)]; bool attention_10_update_begin_0_interleave_0 = const()[name = string("attention_10_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_10_update_begin_0 = concat(axis = attention_10_update_begin_0_axis_0, interleave = attention_10_update_begin_0_interleave_0, values = (attention_10_update_begin_0_values0_0, attention_10_update_begin_0_values1_0, query_pos1, attention_10_update_begin_0_values3_0))[name = string("attention_10_update_begin_0")]; tensor attention_10_update_end_0_values0_0 = const()[name = string("attention_10_update_end_0_values0_0"), val = tensor([11])]; tensor attention_10_update_end_0_values1_0 = const()[name = string("attention_10_update_end_0_values1_0"), val = tensor([2])]; tensor attention_10_update_end_0_values3_0 = const()[name = string("attention_10_update_end_0_values3_0"), val = tensor([64])]; int32 attention_10_update_end_0_axis_0 = const()[name = string("attention_10_update_end_0_axis_0"), val = int32(0)]; bool attention_10_update_end_0_interleave_0 = const()[name = string("attention_10_update_end_0_interleave_0"), val = bool(false)]; tensor attention_10_update_end_0 = concat(axis = attention_10_update_end_0_axis_0, interleave = attention_10_update_end_0_interleave_0, values = (attention_10_update_end_0_values0_0, attention_10_update_end_0_values1_0, end_pos_0, attention_10_update_end_0_values3_0))[name = string("attention_10_update_end_0")]; tensor attention_10_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_updated_key_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_key_cache_0_squeeze_mask_0, update = attention_10_k_rope, x = coreml_update_state_18)[name = string("attention_10_updated_key_cache_0")]; write_state(data = attention_10_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_20_write_state")]; tensor coreml_update_state_20 = read_state(input = key_cache_state)[name = string("coreml_update_state_20")]; tensor attention_10_key_cache_begin_0 = const()[name = string("attention_10_key_cache_begin_0"), val = tensor([10, 0, 0, 0])]; tensor attention_10_key_cache_end_0 = const()[name = string("attention_10_key_cache_end_0"), val = tensor([11, 2, 512, 64])]; tensor attention_10_key_cache_squeeze_mask_0 = const()[name = string("attention_10_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_key_cache = slice_by_index(begin = attention_10_key_cache_begin_0, end = attention_10_key_cache_end_0, squeeze_mask = attention_10_key_cache_squeeze_mask_0, x = coreml_update_state_20)[name = string("attention_10_key_cache")]; int32 attention_10_key_cache_head_axis_0 = const()[name = string("attention_10_key_cache_head_axis_0"), val = int32(1)]; int32 attention_10_key_cache_head_num_splits_0 = const()[name = string("attention_10_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_10_key_cache_head_0, tensor attention_10_key_cache_head_1 = split(axis = attention_10_key_cache_head_axis_0, num_splits = attention_10_key_cache_head_num_splits_0, x = attention_10_key_cache)[name = string("attention_10_key_cache_head")]; tensor attention_10_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_updated_value_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_value_cache_0_squeeze_mask_0, update = attention_10_split_qkv_heads_2, x = coreml_update_state_19)[name = string("attention_10_updated_value_cache_0")]; write_state(data = attention_10_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_21_write_state")]; tensor coreml_update_state_21 = read_state(input = value_cache_state)[name = string("coreml_update_state_21")]; tensor attention_10_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_10_slice_current_layer_value_cache_begin_0"), val = tensor([10, 0, 0, 0])]; tensor attention_10_slice_current_layer_value_cache_end_0 = const()[name = string("attention_10_slice_current_layer_value_cache_end_0"), val = tensor([11, 2, 512, 64])]; tensor attention_10_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_10_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_slice_current_layer_value_cache = slice_by_index(begin = attention_10_slice_current_layer_value_cache_begin_0, end = attention_10_slice_current_layer_value_cache_end_0, squeeze_mask = attention_10_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_21)[name = string("attention_10_slice_current_layer_value_cache")]; int32 attention_10_slice_value_cache_heads_axis_0 = const()[name = string("attention_10_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_10_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_10_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_10_slice_value_cache_heads_0, tensor attention_10_slice_value_cache_heads_1 = split(axis = attention_10_slice_value_cache_heads_axis_0, num_splits = attention_10_slice_value_cache_heads_num_splits_0, x = attention_10_slice_current_layer_value_cache)[name = string("attention_10_slice_value_cache_heads")]; bool attention_10_scores_0_transpose_y_0 = const()[name = string("attention_10_scores_0_transpose_y_0"), val = bool(true)]; bool attention_10_scores_0_transpose_x_0 = const()[name = string("attention_10_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_10_scores_0 = matmul(transpose_x = attention_10_scores_0_transpose_x_0, transpose_y = attention_10_scores_0_transpose_y_0, x = attention_10_key_cache_head_0, y = attention_10_q_splits_0)[name = string("attention_10_scores_0")]; fp16 attention_10_scaled_scores_0_y_0 = const()[name = string("attention_10_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_10_scaled_scores_0 = mul(x = attention_10_scores_0, y = attention_10_scaled_scores_0_y_0)[name = string("attention_10_scaled_scores_0")]; tensor attention_10_masked_scaled_scores_0 = add(x = attention_10_scaled_scores_0, y = transpose_0)[name = string("attention_10_masked_scaled_scores_0")]; int32 softmax_20_axis_0 = const()[name = string("softmax_20_axis_0"), val = int32(-2)]; tensor softmax_20 = softmax(axis = softmax_20_axis_0, x = attention_10_masked_scaled_scores_0)[name = string("softmax_20")]; bool attention_10_attention_0_transpose_x_0 = const()[name = string("attention_10_attention_0_transpose_x_0"), val = bool(true)]; bool attention_10_attention_0_transpose_y_0 = const()[name = string("attention_10_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_10_attention_0 = matmul(transpose_x = attention_10_attention_0_transpose_x_0, transpose_y = attention_10_attention_0_transpose_y_0, x = softmax_20, y = attention_10_slice_value_cache_heads_0)[name = string("attention_10_attention_0")]; bool attention_10_scores_1_transpose_y_0 = const()[name = string("attention_10_scores_1_transpose_y_0"), val = bool(true)]; bool attention_10_scores_1_transpose_x_0 = const()[name = string("attention_10_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_10_scores_1 = matmul(transpose_x = attention_10_scores_1_transpose_x_0, transpose_y = attention_10_scores_1_transpose_y_0, x = attention_10_key_cache_head_1, y = attention_10_q_splits_1)[name = string("attention_10_scores_1")]; fp16 attention_10_scaled_scores_1_y_0 = const()[name = string("attention_10_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_10_scaled_scores_1 = mul(x = attention_10_scores_1, y = attention_10_scaled_scores_1_y_0)[name = string("attention_10_scaled_scores_1")]; tensor attention_10_masked_scaled_scores_1 = add(x = attention_10_scaled_scores_1, y = transpose_0)[name = string("attention_10_masked_scaled_scores_1")]; int32 softmax_21_axis_0 = const()[name = string("softmax_21_axis_0"), val = int32(-2)]; tensor softmax_21 = softmax(axis = softmax_21_axis_0, x = attention_10_masked_scaled_scores_1)[name = string("softmax_21")]; bool attention_10_attention_1_transpose_x_0 = const()[name = string("attention_10_attention_1_transpose_x_0"), val = bool(true)]; bool attention_10_attention_1_transpose_y_0 = const()[name = string("attention_10_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_10_attention_1 = matmul(transpose_x = attention_10_attention_1_transpose_x_0, transpose_y = attention_10_attention_1_transpose_y_0, x = softmax_21, y = attention_10_slice_value_cache_heads_1)[name = string("attention_10_attention_1")]; int32 attention_10_concat_attention_all_heads_axis_0 = const()[name = string("attention_10_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_10_concat_attention_all_heads_interleave_0 = const()[name = string("attention_10_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_10_concat_attention_all_heads = concat(axis = attention_10_concat_attention_all_heads_axis_0, interleave = attention_10_concat_attention_all_heads_interleave_0, values = (attention_10_attention_0, attention_10_attention_1))[name = string("attention_10_concat_attention_all_heads")]; tensor attention_10_channels_first_retransposed_perm_0 = const()[name = string("attention_10_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_10_reshaped_shape_0 = const()[name = string("attention_10_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_10_channels_first_retransposed = transpose(perm = attention_10_channels_first_retransposed_perm_0, x = attention_10_concat_attention_all_heads)[name = string("transpose_27")]; tensor attention_10_reshaped = reshape(shape = attention_10_reshaped_shape_0, x = attention_10_channels_first_retransposed)[name = string("attention_10_reshaped")]; tensor attention_10_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408147712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408749888))))[name = string("attention_10_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_46 = constexpr_blockwise_shift_scale(data = attention_10_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408778624))))[name = string("constexpr_blockwise_shift_scale_46")]; tensor attention_10_outproj_strides_0 = const()[name = string("attention_10_outproj_strides_0"), val = tensor([1])]; string attention_10_outproj_pad_type_0 = const()[name = string("attention_10_outproj_pad_type_0"), val = string("valid")]; tensor attention_10_outproj_pad_0 = const()[name = string("attention_10_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_10_outproj_dilations_0 = const()[name = string("attention_10_outproj_dilations_0"), val = tensor([1])]; int32 attention_10_outproj_groups_0 = const()[name = string("attention_10_outproj_groups_0"), val = int32(1)]; tensor attention_10_outproj = conv(dilations = attention_10_outproj_dilations_0, groups = attention_10_outproj_groups_0, pad = attention_10_outproj_pad_0, pad_type = attention_10_outproj_pad_type_0, strides = attention_10_outproj_strides_0, weight = constexpr_blockwise_shift_scale_46, x = attention_10_reshaped)[name = string("attention_10_outproj")]; tensor block_10_residual_1 = add(x = block_9_residual_2, y = attention_10_outproj)[name = string("block_10_residual_1")]; tensor block_10_ffn_rmsnorm_abs = abs(x = block_10_residual_1)[name = string("block_10_ffn_rmsnorm_abs")]; tensor block_10_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_10_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_10_ffn_rmsnorm_maxval = reduce_max(axes = block_10_ffn_rmsnorm_maxval_axes_0, keep_dims = block_10_ffn_rmsnorm_maxval_keep_dims_0, x = block_10_ffn_rmsnorm_abs)[name = string("block_10_ffn_rmsnorm_maxval")]; fp16 block_10_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_10_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_10_ffn_rmsnorm_maxval_clipped = clip(alpha = block_10_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_10_ffn_rmsnorm_maxval_clipped_beta_0, x = block_10_ffn_rmsnorm_maxval)[name = string("block_10_ffn_rmsnorm_maxval_clipped")]; tensor block_10_ffn_rmsnorm_scaled = real_div(x = block_10_residual_1, y = block_10_ffn_rmsnorm_maxval_clipped)[name = string("block_10_ffn_rmsnorm_scaled")]; tensor block_10_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_10_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_10_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_10_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_10_ffn_rmsnorm_scaled)[name = string("block_10_ffn_rmsnorm_squared_sum")]; fp16 block_10_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_10_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_10_ffn_rmsnorm_rsqrt_epsilon_0, x = block_10_ffn_rmsnorm_squared_sum)[name = string("block_10_ffn_rmsnorm_rsqrt")]; fp16 block_10_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_10_ffn_rmsnorm_dim_scaled = mul(x = block_10_ffn_rmsnorm_scaled, y = block_10_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_10_ffn_rmsnorm_dim_scaled")]; tensor block_10_ffn_rmsnorm_normalized = mul(x = block_10_ffn_rmsnorm_dim_scaled, y = block_10_ffn_rmsnorm_rsqrt)[name = string("block_10_ffn_rmsnorm_normalized")]; tensor block_10_ffn_rmsnorm_y_0 = const()[name = string("block_10_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408780480)))]; tensor block_10_ffn_rmsnorm = mul(x = block_10_ffn_rmsnorm_normalized, y = block_10_ffn_rmsnorm_y_0)[name = string("block_10_ffn_rmsnorm")]; tensor block_10_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408782336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412051008))))[name = string("block_10_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_47 = constexpr_blockwise_shift_scale(data = block_10_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412206720))))[name = string("constexpr_blockwise_shift_scale_47")]; tensor block_10_ffn_inproj_strides_0 = const()[name = string("block_10_ffn_inproj_strides_0"), val = tensor([1])]; string block_10_ffn_inproj_pad_type_0 = const()[name = string("block_10_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_10_ffn_inproj_pad_0 = const()[name = string("block_10_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_10_ffn_inproj_dilations_0 = const()[name = string("block_10_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_10_ffn_inproj_groups_0 = const()[name = string("block_10_ffn_inproj_groups_0"), val = int32(1)]; tensor block_10_ffn_inproj = conv(dilations = block_10_ffn_inproj_dilations_0, groups = block_10_ffn_inproj_groups_0, pad = block_10_ffn_inproj_pad_0, pad_type = block_10_ffn_inproj_pad_type_0, strides = block_10_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_47, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_inproj")]; tensor block_10_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412216512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415485184))))[name = string("block_10_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_48 = constexpr_blockwise_shift_scale(data = block_10_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415640896))))[name = string("constexpr_blockwise_shift_scale_48")]; tensor block_10_ffn_g_strides_0 = const()[name = string("block_10_ffn_g_strides_0"), val = tensor([1])]; string block_10_ffn_g_pad_type_0 = const()[name = string("block_10_ffn_g_pad_type_0"), val = string("valid")]; tensor block_10_ffn_g_pad_0 = const()[name = string("block_10_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_10_ffn_g_dilations_0 = const()[name = string("block_10_ffn_g_dilations_0"), val = tensor([1])]; int32 block_10_ffn_g_groups_0 = const()[name = string("block_10_ffn_g_groups_0"), val = int32(1)]; tensor block_10_ffn_g = conv(dilations = block_10_ffn_g_dilations_0, groups = block_10_ffn_g_groups_0, pad = block_10_ffn_g_pad_0, pad_type = block_10_ffn_g_pad_type_0, strides = block_10_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_48, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_g")]; tensor block_10_ffn_g_activation = silu(x = block_10_ffn_g)[name = string("block_10_ffn_g_activation")]; tensor block_10_ffn_x_gated = mul(x = block_10_ffn_inproj, y = block_10_ffn_g_activation)[name = string("block_10_ffn_x_gated")]; tensor block_10_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415650688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418919360))))[name = string("block_10_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_49 = constexpr_blockwise_shift_scale(data = block_10_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418948096))))[name = string("constexpr_blockwise_shift_scale_49")]; tensor block_10_ffn_outproj_strides_0 = const()[name = string("block_10_ffn_outproj_strides_0"), val = tensor([1])]; string block_10_ffn_outproj_pad_type_0 = const()[name = string("block_10_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_10_ffn_outproj_pad_0 = const()[name = string("block_10_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_10_ffn_outproj_dilations_0 = const()[name = string("block_10_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_10_ffn_outproj_groups_0 = const()[name = string("block_10_ffn_outproj_groups_0"), val = int32(1)]; tensor block_10_ffn_outproj = conv(dilations = block_10_ffn_outproj_dilations_0, groups = block_10_ffn_outproj_groups_0, pad = block_10_ffn_outproj_pad_0, pad_type = block_10_ffn_outproj_pad_type_0, strides = block_10_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_49, x = block_10_ffn_x_gated)[name = string("block_10_ffn_outproj")]; tensor block_10_residual_2 = add(x = block_10_ffn_outproj, y = block_10_residual_1)[name = string("block_10_residual_2")]; tensor block_11_attention_rmsnorm_abs = abs(x = block_10_residual_2)[name = string("block_11_attention_rmsnorm_abs")]; tensor block_11_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_11_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_11_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_11_attention_rmsnorm_maxval = reduce_max(axes = block_11_attention_rmsnorm_maxval_axes_0, keep_dims = block_11_attention_rmsnorm_maxval_keep_dims_0, x = block_11_attention_rmsnorm_abs)[name = string("block_11_attention_rmsnorm_maxval")]; fp16 block_11_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_11_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_11_attention_rmsnorm_maxval_clipped = clip(alpha = block_11_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_11_attention_rmsnorm_maxval_clipped_beta_0, x = block_11_attention_rmsnorm_maxval)[name = string("block_11_attention_rmsnorm_maxval_clipped")]; tensor block_11_attention_rmsnorm_scaled = real_div(x = block_10_residual_2, y = block_11_attention_rmsnorm_maxval_clipped)[name = string("block_11_attention_rmsnorm_scaled")]; tensor block_11_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_11_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_11_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_11_attention_rmsnorm_squared_sum_keep_dims_0, x = block_11_attention_rmsnorm_scaled)[name = string("block_11_attention_rmsnorm_squared_sum")]; fp16 block_11_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_11_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_11_attention_rmsnorm_rsqrt_epsilon_0, x = block_11_attention_rmsnorm_squared_sum)[name = string("block_11_attention_rmsnorm_rsqrt")]; fp16 block_11_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_11_attention_rmsnorm_dim_scaled = mul(x = block_11_attention_rmsnorm_scaled, y = block_11_attention_rmsnorm_dim_scaled_y_0)[name = string("block_11_attention_rmsnorm_dim_scaled")]; tensor block_11_attention_rmsnorm_normalized = mul(x = block_11_attention_rmsnorm_dim_scaled, y = block_11_attention_rmsnorm_rsqrt)[name = string("block_11_attention_rmsnorm_normalized")]; tensor block_11_attention_rmsnorm_y_0 = const()[name = string("block_11_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418949952)))]; tensor block_11_attention_rmsnorm = mul(x = block_11_attention_rmsnorm_normalized, y = block_11_attention_rmsnorm_y_0)[name = string("block_11_attention_rmsnorm")]; tensor attention_11_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418951808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419726016))))[name = string("attention_11_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_50 = constexpr_blockwise_shift_scale(data = attention_11_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419762944))))[name = string("constexpr_blockwise_shift_scale_50")]; tensor attention_11_qkvproj_bias_0 = const()[name = string("attention_11_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419765312)))]; tensor attention_11_qkvproj_strides_0 = const()[name = string("attention_11_qkvproj_strides_0"), val = tensor([1])]; string attention_11_qkvproj_pad_type_0 = const()[name = string("attention_11_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_11_qkvproj_pad_0 = const()[name = string("attention_11_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_11_qkvproj_dilations_0 = const()[name = string("attention_11_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_11_qkvproj_groups_0 = const()[name = string("attention_11_qkvproj_groups_0"), val = int32(1)]; tensor attention_11_qkvproj = conv(bias = attention_11_qkvproj_bias_0, dilations = attention_11_qkvproj_dilations_0, groups = attention_11_qkvproj_groups_0, pad = attention_11_qkvproj_pad_0, pad_type = attention_11_qkvproj_pad_type_0, strides = attention_11_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_50, x = block_11_attention_rmsnorm)[name = string("attention_11_qkvproj")]; tensor attention_11_head_reshape_shape_0 = const()[name = string("attention_11_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_11_head_reshape = reshape(shape = attention_11_head_reshape_shape_0, x = attention_11_qkvproj)[name = string("attention_11_head_reshape")]; tensor attention_11_head_transpose_perm_0 = const()[name = string("attention_11_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_11_split_qkv_heads_axis_0 = const()[name = string("attention_11_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_11_split_qkv_heads_split_sizes_0 = const()[name = string("attention_11_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_11_head_transpose = transpose(perm = attention_11_head_transpose_perm_0, x = attention_11_head_reshape)[name = string("transpose_26")]; tensor attention_11_split_qkv_heads_0, tensor attention_11_split_qkv_heads_1, tensor attention_11_split_qkv_heads_2 = split(axis = attention_11_split_qkv_heads_axis_0, split_sizes = attention_11_split_qkv_heads_split_sizes_0, x = attention_11_head_transpose)[name = string("attention_11_split_qkv_heads")]; tensor attention_11_q_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_11_q_rope_lhs_mult")]; int32 attention_11_q_rotate_half_split_num_splits_0 = const()[name = string("attention_11_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_11_q_rotate_half_split_axis_0 = const()[name = string("attention_11_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_11_q_rotate_half_split_0, tensor attention_11_q_rotate_half_split_1 = split(axis = attention_11_q_rotate_half_split_axis_0, num_splits = attention_11_q_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_0)[name = string("attention_11_q_rotate_half_split")]; fp16 attention_11_q_rotate_half_neg_y_0 = const()[name = string("attention_11_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_11_q_rotate_half_neg = mul(x = attention_11_q_rotate_half_split_1, y = attention_11_q_rotate_half_neg_y_0)[name = string("attention_11_q_rotate_half_neg")]; int32 attention_11_q_rotate_half_concat_axis_0 = const()[name = string("attention_11_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_11_q_rotate_half_concat_interleave_0 = const()[name = string("attention_11_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_11_q_rotate_half_concat = concat(axis = attention_11_q_rotate_half_concat_axis_0, interleave = attention_11_q_rotate_half_concat_interleave_0, values = (attention_11_q_rotate_half_neg, attention_11_q_rotate_half_split_0))[name = string("attention_11_q_rotate_half_concat")]; tensor attention_11_q_rope_rhs_mult = mul(x = attention_11_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_q_rope_rhs_mult")]; tensor attention_11_q_rope = add(x = attention_11_q_rope_lhs_mult, y = attention_11_q_rope_rhs_mult)[name = string("attention_11_q_rope")]; tensor attention_11_k_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_11_k_rope_lhs_mult")]; int32 attention_11_k_rotate_half_split_num_splits_0 = const()[name = string("attention_11_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_11_k_rotate_half_split_axis_0 = const()[name = string("attention_11_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_11_k_rotate_half_split_0, tensor attention_11_k_rotate_half_split_1 = split(axis = attention_11_k_rotate_half_split_axis_0, num_splits = attention_11_k_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_1)[name = string("attention_11_k_rotate_half_split")]; fp16 attention_11_k_rotate_half_neg_y_0 = const()[name = string("attention_11_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_11_k_rotate_half_neg = mul(x = attention_11_k_rotate_half_split_1, y = attention_11_k_rotate_half_neg_y_0)[name = string("attention_11_k_rotate_half_neg")]; int32 attention_11_k_rotate_half_concat_axis_0 = const()[name = string("attention_11_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_11_k_rotate_half_concat_interleave_0 = const()[name = string("attention_11_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_11_k_rotate_half_concat = concat(axis = attention_11_k_rotate_half_concat_axis_0, interleave = attention_11_k_rotate_half_concat_interleave_0, values = (attention_11_k_rotate_half_neg, attention_11_k_rotate_half_split_0))[name = string("attention_11_k_rotate_half_concat")]; tensor attention_11_k_rope_rhs_mult = mul(x = attention_11_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_k_rope_rhs_mult")]; tensor attention_11_k_rope = add(x = attention_11_k_rope_lhs_mult, y = attention_11_k_rope_rhs_mult)[name = string("attention_11_k_rope")]; int32 attention_11_q_splits_axis_0 = const()[name = string("attention_11_q_splits_axis_0"), val = int32(1)]; int32 attention_11_q_splits_num_splits_0 = const()[name = string("attention_11_q_splits_num_splits_0"), val = int32(2)]; tensor attention_11_q_splits_0, tensor attention_11_q_splits_1 = split(axis = attention_11_q_splits_axis_0, num_splits = attention_11_q_splits_num_splits_0, x = attention_11_q_rope)[name = string("attention_11_q_splits")]; tensor attention_11_update_begin_0_values0_0 = const()[name = string("attention_11_update_begin_0_values0_0"), val = tensor([11])]; tensor attention_11_update_begin_0_values1_0 = const()[name = string("attention_11_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_11_update_begin_0_values3_0 = const()[name = string("attention_11_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_11_update_begin_0_axis_0 = const()[name = string("attention_11_update_begin_0_axis_0"), val = int32(0)]; bool attention_11_update_begin_0_interleave_0 = const()[name = string("attention_11_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_11_update_begin_0 = concat(axis = attention_11_update_begin_0_axis_0, interleave = attention_11_update_begin_0_interleave_0, values = (attention_11_update_begin_0_values0_0, attention_11_update_begin_0_values1_0, query_pos1, attention_11_update_begin_0_values3_0))[name = string("attention_11_update_begin_0")]; tensor attention_11_update_end_0_values0_0 = const()[name = string("attention_11_update_end_0_values0_0"), val = tensor([12])]; tensor attention_11_update_end_0_values1_0 = const()[name = string("attention_11_update_end_0_values1_0"), val = tensor([2])]; tensor attention_11_update_end_0_values3_0 = const()[name = string("attention_11_update_end_0_values3_0"), val = tensor([64])]; int32 attention_11_update_end_0_axis_0 = const()[name = string("attention_11_update_end_0_axis_0"), val = int32(0)]; bool attention_11_update_end_0_interleave_0 = const()[name = string("attention_11_update_end_0_interleave_0"), val = bool(false)]; tensor attention_11_update_end_0 = concat(axis = attention_11_update_end_0_axis_0, interleave = attention_11_update_end_0_interleave_0, values = (attention_11_update_end_0_values0_0, attention_11_update_end_0_values1_0, end_pos_0, attention_11_update_end_0_values3_0))[name = string("attention_11_update_end_0")]; tensor attention_11_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_updated_key_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_key_cache_0_squeeze_mask_0, update = attention_11_k_rope, x = coreml_update_state_20)[name = string("attention_11_updated_key_cache_0")]; write_state(data = attention_11_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_22_write_state")]; tensor coreml_update_state_22 = read_state(input = key_cache_state)[name = string("coreml_update_state_22")]; tensor attention_11_key_cache_begin_0 = const()[name = string("attention_11_key_cache_begin_0"), val = tensor([11, 0, 0, 0])]; tensor attention_11_key_cache_end_0 = const()[name = string("attention_11_key_cache_end_0"), val = tensor([12, 2, 512, 64])]; tensor attention_11_key_cache_squeeze_mask_0 = const()[name = string("attention_11_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_key_cache = slice_by_index(begin = attention_11_key_cache_begin_0, end = attention_11_key_cache_end_0, squeeze_mask = attention_11_key_cache_squeeze_mask_0, x = coreml_update_state_22)[name = string("attention_11_key_cache")]; int32 attention_11_key_cache_head_axis_0 = const()[name = string("attention_11_key_cache_head_axis_0"), val = int32(1)]; int32 attention_11_key_cache_head_num_splits_0 = const()[name = string("attention_11_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_11_key_cache_head_0, tensor attention_11_key_cache_head_1 = split(axis = attention_11_key_cache_head_axis_0, num_splits = attention_11_key_cache_head_num_splits_0, x = attention_11_key_cache)[name = string("attention_11_key_cache_head")]; tensor attention_11_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_updated_value_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_value_cache_0_squeeze_mask_0, update = attention_11_split_qkv_heads_2, x = coreml_update_state_21)[name = string("attention_11_updated_value_cache_0")]; write_state(data = attention_11_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_23_write_state")]; tensor coreml_update_state_23 = read_state(input = value_cache_state)[name = string("coreml_update_state_23")]; tensor attention_11_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_11_slice_current_layer_value_cache_begin_0"), val = tensor([11, 0, 0, 0])]; tensor attention_11_slice_current_layer_value_cache_end_0 = const()[name = string("attention_11_slice_current_layer_value_cache_end_0"), val = tensor([12, 2, 512, 64])]; tensor attention_11_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_11_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_slice_current_layer_value_cache = slice_by_index(begin = attention_11_slice_current_layer_value_cache_begin_0, end = attention_11_slice_current_layer_value_cache_end_0, squeeze_mask = attention_11_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_23)[name = string("attention_11_slice_current_layer_value_cache")]; int32 attention_11_slice_value_cache_heads_axis_0 = const()[name = string("attention_11_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_11_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_11_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_11_slice_value_cache_heads_0, tensor attention_11_slice_value_cache_heads_1 = split(axis = attention_11_slice_value_cache_heads_axis_0, num_splits = attention_11_slice_value_cache_heads_num_splits_0, x = attention_11_slice_current_layer_value_cache)[name = string("attention_11_slice_value_cache_heads")]; bool attention_11_scores_0_transpose_y_0 = const()[name = string("attention_11_scores_0_transpose_y_0"), val = bool(true)]; bool attention_11_scores_0_transpose_x_0 = const()[name = string("attention_11_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_11_scores_0 = matmul(transpose_x = attention_11_scores_0_transpose_x_0, transpose_y = attention_11_scores_0_transpose_y_0, x = attention_11_key_cache_head_0, y = attention_11_q_splits_0)[name = string("attention_11_scores_0")]; fp16 attention_11_scaled_scores_0_y_0 = const()[name = string("attention_11_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_11_scaled_scores_0 = mul(x = attention_11_scores_0, y = attention_11_scaled_scores_0_y_0)[name = string("attention_11_scaled_scores_0")]; tensor attention_11_masked_scaled_scores_0 = add(x = attention_11_scaled_scores_0, y = transpose_0)[name = string("attention_11_masked_scaled_scores_0")]; int32 softmax_22_axis_0 = const()[name = string("softmax_22_axis_0"), val = int32(-2)]; tensor softmax_22 = softmax(axis = softmax_22_axis_0, x = attention_11_masked_scaled_scores_0)[name = string("softmax_22")]; bool attention_11_attention_0_transpose_x_0 = const()[name = string("attention_11_attention_0_transpose_x_0"), val = bool(true)]; bool attention_11_attention_0_transpose_y_0 = const()[name = string("attention_11_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_11_attention_0 = matmul(transpose_x = attention_11_attention_0_transpose_x_0, transpose_y = attention_11_attention_0_transpose_y_0, x = softmax_22, y = attention_11_slice_value_cache_heads_0)[name = string("attention_11_attention_0")]; bool attention_11_scores_1_transpose_y_0 = const()[name = string("attention_11_scores_1_transpose_y_0"), val = bool(true)]; bool attention_11_scores_1_transpose_x_0 = const()[name = string("attention_11_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_11_scores_1 = matmul(transpose_x = attention_11_scores_1_transpose_x_0, transpose_y = attention_11_scores_1_transpose_y_0, x = attention_11_key_cache_head_1, y = attention_11_q_splits_1)[name = string("attention_11_scores_1")]; fp16 attention_11_scaled_scores_1_y_0 = const()[name = string("attention_11_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_11_scaled_scores_1 = mul(x = attention_11_scores_1, y = attention_11_scaled_scores_1_y_0)[name = string("attention_11_scaled_scores_1")]; tensor attention_11_masked_scaled_scores_1 = add(x = attention_11_scaled_scores_1, y = transpose_0)[name = string("attention_11_masked_scaled_scores_1")]; int32 softmax_23_axis_0 = const()[name = string("softmax_23_axis_0"), val = int32(-2)]; tensor softmax_23 = softmax(axis = softmax_23_axis_0, x = attention_11_masked_scaled_scores_1)[name = string("softmax_23")]; bool attention_11_attention_1_transpose_x_0 = const()[name = string("attention_11_attention_1_transpose_x_0"), val = bool(true)]; bool attention_11_attention_1_transpose_y_0 = const()[name = string("attention_11_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_11_attention_1 = matmul(transpose_x = attention_11_attention_1_transpose_x_0, transpose_y = attention_11_attention_1_transpose_y_0, x = softmax_23, y = attention_11_slice_value_cache_heads_1)[name = string("attention_11_attention_1")]; int32 attention_11_concat_attention_all_heads_axis_0 = const()[name = string("attention_11_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_11_concat_attention_all_heads_interleave_0 = const()[name = string("attention_11_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_11_concat_attention_all_heads = concat(axis = attention_11_concat_attention_all_heads_axis_0, interleave = attention_11_concat_attention_all_heads_interleave_0, values = (attention_11_attention_0, attention_11_attention_1))[name = string("attention_11_concat_attention_all_heads")]; tensor attention_11_channels_first_retransposed_perm_0 = const()[name = string("attention_11_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_11_reshaped_shape_0 = const()[name = string("attention_11_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_11_channels_first_retransposed = transpose(perm = attention_11_channels_first_retransposed_perm_0, x = attention_11_concat_attention_all_heads)[name = string("transpose_25")]; tensor attention_11_reshaped = reshape(shape = attention_11_reshaped_shape_0, x = attention_11_channels_first_retransposed)[name = string("attention_11_reshaped")]; tensor attention_11_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419767680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420369856))))[name = string("attention_11_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_51 = constexpr_blockwise_shift_scale(data = attention_11_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420398592))))[name = string("constexpr_blockwise_shift_scale_51")]; tensor attention_11_outproj_strides_0 = const()[name = string("attention_11_outproj_strides_0"), val = tensor([1])]; string attention_11_outproj_pad_type_0 = const()[name = string("attention_11_outproj_pad_type_0"), val = string("valid")]; tensor attention_11_outproj_pad_0 = const()[name = string("attention_11_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_11_outproj_dilations_0 = const()[name = string("attention_11_outproj_dilations_0"), val = tensor([1])]; int32 attention_11_outproj_groups_0 = const()[name = string("attention_11_outproj_groups_0"), val = int32(1)]; tensor attention_11_outproj = conv(dilations = attention_11_outproj_dilations_0, groups = attention_11_outproj_groups_0, pad = attention_11_outproj_pad_0, pad_type = attention_11_outproj_pad_type_0, strides = attention_11_outproj_strides_0, weight = constexpr_blockwise_shift_scale_51, x = attention_11_reshaped)[name = string("attention_11_outproj")]; tensor block_11_residual_1 = add(x = block_10_residual_2, y = attention_11_outproj)[name = string("block_11_residual_1")]; tensor block_11_ffn_rmsnorm_abs = abs(x = block_11_residual_1)[name = string("block_11_ffn_rmsnorm_abs")]; tensor block_11_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_11_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_11_ffn_rmsnorm_maxval = reduce_max(axes = block_11_ffn_rmsnorm_maxval_axes_0, keep_dims = block_11_ffn_rmsnorm_maxval_keep_dims_0, x = block_11_ffn_rmsnorm_abs)[name = string("block_11_ffn_rmsnorm_maxval")]; fp16 block_11_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_11_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_11_ffn_rmsnorm_maxval_clipped = clip(alpha = block_11_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_11_ffn_rmsnorm_maxval_clipped_beta_0, x = block_11_ffn_rmsnorm_maxval)[name = string("block_11_ffn_rmsnorm_maxval_clipped")]; tensor block_11_ffn_rmsnorm_scaled = real_div(x = block_11_residual_1, y = block_11_ffn_rmsnorm_maxval_clipped)[name = string("block_11_ffn_rmsnorm_scaled")]; tensor block_11_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_11_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_11_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_11_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_11_ffn_rmsnorm_scaled)[name = string("block_11_ffn_rmsnorm_squared_sum")]; fp16 block_11_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_11_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_11_ffn_rmsnorm_rsqrt_epsilon_0, x = block_11_ffn_rmsnorm_squared_sum)[name = string("block_11_ffn_rmsnorm_rsqrt")]; fp16 block_11_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_11_ffn_rmsnorm_dim_scaled = mul(x = block_11_ffn_rmsnorm_scaled, y = block_11_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_11_ffn_rmsnorm_dim_scaled")]; tensor block_11_ffn_rmsnorm_normalized = mul(x = block_11_ffn_rmsnorm_dim_scaled, y = block_11_ffn_rmsnorm_rsqrt)[name = string("block_11_ffn_rmsnorm_normalized")]; tensor block_11_ffn_rmsnorm_y_0 = const()[name = string("block_11_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420400448)))]; tensor block_11_ffn_rmsnorm = mul(x = block_11_ffn_rmsnorm_normalized, y = block_11_ffn_rmsnorm_y_0)[name = string("block_11_ffn_rmsnorm")]; tensor block_11_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420402304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423670976))))[name = string("block_11_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_52 = constexpr_blockwise_shift_scale(data = block_11_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423826688))))[name = string("constexpr_blockwise_shift_scale_52")]; tensor block_11_ffn_inproj_strides_0 = const()[name = string("block_11_ffn_inproj_strides_0"), val = tensor([1])]; string block_11_ffn_inproj_pad_type_0 = const()[name = string("block_11_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_11_ffn_inproj_pad_0 = const()[name = string("block_11_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_11_ffn_inproj_dilations_0 = const()[name = string("block_11_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_11_ffn_inproj_groups_0 = const()[name = string("block_11_ffn_inproj_groups_0"), val = int32(1)]; tensor block_11_ffn_inproj = conv(dilations = block_11_ffn_inproj_dilations_0, groups = block_11_ffn_inproj_groups_0, pad = block_11_ffn_inproj_pad_0, pad_type = block_11_ffn_inproj_pad_type_0, strides = block_11_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_52, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_inproj")]; tensor block_11_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423836480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427105152))))[name = string("block_11_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_53 = constexpr_blockwise_shift_scale(data = block_11_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427260864))))[name = string("constexpr_blockwise_shift_scale_53")]; tensor block_11_ffn_g_strides_0 = const()[name = string("block_11_ffn_g_strides_0"), val = tensor([1])]; string block_11_ffn_g_pad_type_0 = const()[name = string("block_11_ffn_g_pad_type_0"), val = string("valid")]; tensor block_11_ffn_g_pad_0 = const()[name = string("block_11_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_11_ffn_g_dilations_0 = const()[name = string("block_11_ffn_g_dilations_0"), val = tensor([1])]; int32 block_11_ffn_g_groups_0 = const()[name = string("block_11_ffn_g_groups_0"), val = int32(1)]; tensor block_11_ffn_g = conv(dilations = block_11_ffn_g_dilations_0, groups = block_11_ffn_g_groups_0, pad = block_11_ffn_g_pad_0, pad_type = block_11_ffn_g_pad_type_0, strides = block_11_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_53, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_g")]; tensor block_11_ffn_g_activation = silu(x = block_11_ffn_g)[name = string("block_11_ffn_g_activation")]; tensor block_11_ffn_x_gated = mul(x = block_11_ffn_inproj, y = block_11_ffn_g_activation)[name = string("block_11_ffn_x_gated")]; tensor block_11_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427270656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430539328))))[name = string("block_11_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_54 = constexpr_blockwise_shift_scale(data = block_11_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430568064))))[name = string("constexpr_blockwise_shift_scale_54")]; tensor block_11_ffn_outproj_strides_0 = const()[name = string("block_11_ffn_outproj_strides_0"), val = tensor([1])]; string block_11_ffn_outproj_pad_type_0 = const()[name = string("block_11_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_11_ffn_outproj_pad_0 = const()[name = string("block_11_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_11_ffn_outproj_dilations_0 = const()[name = string("block_11_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_11_ffn_outproj_groups_0 = const()[name = string("block_11_ffn_outproj_groups_0"), val = int32(1)]; tensor block_11_ffn_outproj = conv(dilations = block_11_ffn_outproj_dilations_0, groups = block_11_ffn_outproj_groups_0, pad = block_11_ffn_outproj_pad_0, pad_type = block_11_ffn_outproj_pad_type_0, strides = block_11_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_54, x = block_11_ffn_x_gated)[name = string("block_11_ffn_outproj")]; tensor block_11_residual_2 = add(x = block_11_ffn_outproj, y = block_11_residual_1)[name = string("block_11_residual_2")]; tensor block_12_attention_rmsnorm_abs = abs(x = block_11_residual_2)[name = string("block_12_attention_rmsnorm_abs")]; tensor block_12_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_12_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_12_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_12_attention_rmsnorm_maxval = reduce_max(axes = block_12_attention_rmsnorm_maxval_axes_0, keep_dims = block_12_attention_rmsnorm_maxval_keep_dims_0, x = block_12_attention_rmsnorm_abs)[name = string("block_12_attention_rmsnorm_maxval")]; fp16 block_12_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_12_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_12_attention_rmsnorm_maxval_clipped = clip(alpha = block_12_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_12_attention_rmsnorm_maxval_clipped_beta_0, x = block_12_attention_rmsnorm_maxval)[name = string("block_12_attention_rmsnorm_maxval_clipped")]; tensor block_12_attention_rmsnorm_scaled = real_div(x = block_11_residual_2, y = block_12_attention_rmsnorm_maxval_clipped)[name = string("block_12_attention_rmsnorm_scaled")]; tensor block_12_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_12_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_12_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_12_attention_rmsnorm_squared_sum_keep_dims_0, x = block_12_attention_rmsnorm_scaled)[name = string("block_12_attention_rmsnorm_squared_sum")]; fp16 block_12_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_12_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_12_attention_rmsnorm_rsqrt_epsilon_0, x = block_12_attention_rmsnorm_squared_sum)[name = string("block_12_attention_rmsnorm_rsqrt")]; fp16 block_12_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_12_attention_rmsnorm_dim_scaled = mul(x = block_12_attention_rmsnorm_scaled, y = block_12_attention_rmsnorm_dim_scaled_y_0)[name = string("block_12_attention_rmsnorm_dim_scaled")]; tensor block_12_attention_rmsnorm_normalized = mul(x = block_12_attention_rmsnorm_dim_scaled, y = block_12_attention_rmsnorm_rsqrt)[name = string("block_12_attention_rmsnorm_normalized")]; tensor block_12_attention_rmsnorm_y_0 = const()[name = string("block_12_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430569920)))]; tensor block_12_attention_rmsnorm = mul(x = block_12_attention_rmsnorm_normalized, y = block_12_attention_rmsnorm_y_0)[name = string("block_12_attention_rmsnorm")]; tensor attention_12_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430571776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431345984))))[name = string("attention_12_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_55 = constexpr_blockwise_shift_scale(data = attention_12_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431382912))))[name = string("constexpr_blockwise_shift_scale_55")]; tensor attention_12_qkvproj_bias_0 = const()[name = string("attention_12_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431385280)))]; tensor attention_12_qkvproj_strides_0 = const()[name = string("attention_12_qkvproj_strides_0"), val = tensor([1])]; string attention_12_qkvproj_pad_type_0 = const()[name = string("attention_12_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_12_qkvproj_pad_0 = const()[name = string("attention_12_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_12_qkvproj_dilations_0 = const()[name = string("attention_12_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_12_qkvproj_groups_0 = const()[name = string("attention_12_qkvproj_groups_0"), val = int32(1)]; tensor attention_12_qkvproj = conv(bias = attention_12_qkvproj_bias_0, dilations = attention_12_qkvproj_dilations_0, groups = attention_12_qkvproj_groups_0, pad = attention_12_qkvproj_pad_0, pad_type = attention_12_qkvproj_pad_type_0, strides = attention_12_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_55, x = block_12_attention_rmsnorm)[name = string("attention_12_qkvproj")]; tensor attention_12_head_reshape_shape_0 = const()[name = string("attention_12_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_12_head_reshape = reshape(shape = attention_12_head_reshape_shape_0, x = attention_12_qkvproj)[name = string("attention_12_head_reshape")]; tensor attention_12_head_transpose_perm_0 = const()[name = string("attention_12_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_12_split_qkv_heads_axis_0 = const()[name = string("attention_12_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_12_split_qkv_heads_split_sizes_0 = const()[name = string("attention_12_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_12_head_transpose = transpose(perm = attention_12_head_transpose_perm_0, x = attention_12_head_reshape)[name = string("transpose_24")]; tensor attention_12_split_qkv_heads_0, tensor attention_12_split_qkv_heads_1, tensor attention_12_split_qkv_heads_2 = split(axis = attention_12_split_qkv_heads_axis_0, split_sizes = attention_12_split_qkv_heads_split_sizes_0, x = attention_12_head_transpose)[name = string("attention_12_split_qkv_heads")]; tensor attention_12_q_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_12_q_rope_lhs_mult")]; int32 attention_12_q_rotate_half_split_num_splits_0 = const()[name = string("attention_12_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_12_q_rotate_half_split_axis_0 = const()[name = string("attention_12_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_12_q_rotate_half_split_0, tensor attention_12_q_rotate_half_split_1 = split(axis = attention_12_q_rotate_half_split_axis_0, num_splits = attention_12_q_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_0)[name = string("attention_12_q_rotate_half_split")]; fp16 attention_12_q_rotate_half_neg_y_0 = const()[name = string("attention_12_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_12_q_rotate_half_neg = mul(x = attention_12_q_rotate_half_split_1, y = attention_12_q_rotate_half_neg_y_0)[name = string("attention_12_q_rotate_half_neg")]; int32 attention_12_q_rotate_half_concat_axis_0 = const()[name = string("attention_12_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_12_q_rotate_half_concat_interleave_0 = const()[name = string("attention_12_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_12_q_rotate_half_concat = concat(axis = attention_12_q_rotate_half_concat_axis_0, interleave = attention_12_q_rotate_half_concat_interleave_0, values = (attention_12_q_rotate_half_neg, attention_12_q_rotate_half_split_0))[name = string("attention_12_q_rotate_half_concat")]; tensor attention_12_q_rope_rhs_mult = mul(x = attention_12_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_q_rope_rhs_mult")]; tensor attention_12_q_rope = add(x = attention_12_q_rope_lhs_mult, y = attention_12_q_rope_rhs_mult)[name = string("attention_12_q_rope")]; tensor attention_12_k_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_12_k_rope_lhs_mult")]; int32 attention_12_k_rotate_half_split_num_splits_0 = const()[name = string("attention_12_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_12_k_rotate_half_split_axis_0 = const()[name = string("attention_12_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_12_k_rotate_half_split_0, tensor attention_12_k_rotate_half_split_1 = split(axis = attention_12_k_rotate_half_split_axis_0, num_splits = attention_12_k_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_1)[name = string("attention_12_k_rotate_half_split")]; fp16 attention_12_k_rotate_half_neg_y_0 = const()[name = string("attention_12_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_12_k_rotate_half_neg = mul(x = attention_12_k_rotate_half_split_1, y = attention_12_k_rotate_half_neg_y_0)[name = string("attention_12_k_rotate_half_neg")]; int32 attention_12_k_rotate_half_concat_axis_0 = const()[name = string("attention_12_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_12_k_rotate_half_concat_interleave_0 = const()[name = string("attention_12_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_12_k_rotate_half_concat = concat(axis = attention_12_k_rotate_half_concat_axis_0, interleave = attention_12_k_rotate_half_concat_interleave_0, values = (attention_12_k_rotate_half_neg, attention_12_k_rotate_half_split_0))[name = string("attention_12_k_rotate_half_concat")]; tensor attention_12_k_rope_rhs_mult = mul(x = attention_12_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_k_rope_rhs_mult")]; tensor attention_12_k_rope = add(x = attention_12_k_rope_lhs_mult, y = attention_12_k_rope_rhs_mult)[name = string("attention_12_k_rope")]; int32 attention_12_q_splits_axis_0 = const()[name = string("attention_12_q_splits_axis_0"), val = int32(1)]; int32 attention_12_q_splits_num_splits_0 = const()[name = string("attention_12_q_splits_num_splits_0"), val = int32(2)]; tensor attention_12_q_splits_0, tensor attention_12_q_splits_1 = split(axis = attention_12_q_splits_axis_0, num_splits = attention_12_q_splits_num_splits_0, x = attention_12_q_rope)[name = string("attention_12_q_splits")]; tensor attention_12_update_begin_0_values0_0 = const()[name = string("attention_12_update_begin_0_values0_0"), val = tensor([12])]; tensor attention_12_update_begin_0_values1_0 = const()[name = string("attention_12_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_12_update_begin_0_values3_0 = const()[name = string("attention_12_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_12_update_begin_0_axis_0 = const()[name = string("attention_12_update_begin_0_axis_0"), val = int32(0)]; bool attention_12_update_begin_0_interleave_0 = const()[name = string("attention_12_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_12_update_begin_0 = concat(axis = attention_12_update_begin_0_axis_0, interleave = attention_12_update_begin_0_interleave_0, values = (attention_12_update_begin_0_values0_0, attention_12_update_begin_0_values1_0, query_pos1, attention_12_update_begin_0_values3_0))[name = string("attention_12_update_begin_0")]; tensor attention_12_update_end_0_values0_0 = const()[name = string("attention_12_update_end_0_values0_0"), val = tensor([13])]; tensor attention_12_update_end_0_values1_0 = const()[name = string("attention_12_update_end_0_values1_0"), val = tensor([2])]; tensor attention_12_update_end_0_values3_0 = const()[name = string("attention_12_update_end_0_values3_0"), val = tensor([64])]; int32 attention_12_update_end_0_axis_0 = const()[name = string("attention_12_update_end_0_axis_0"), val = int32(0)]; bool attention_12_update_end_0_interleave_0 = const()[name = string("attention_12_update_end_0_interleave_0"), val = bool(false)]; tensor attention_12_update_end_0 = concat(axis = attention_12_update_end_0_axis_0, interleave = attention_12_update_end_0_interleave_0, values = (attention_12_update_end_0_values0_0, attention_12_update_end_0_values1_0, end_pos_0, attention_12_update_end_0_values3_0))[name = string("attention_12_update_end_0")]; tensor attention_12_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_updated_key_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_key_cache_0_squeeze_mask_0, update = attention_12_k_rope, x = coreml_update_state_22)[name = string("attention_12_updated_key_cache_0")]; write_state(data = attention_12_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_24_write_state")]; tensor coreml_update_state_24 = read_state(input = key_cache_state)[name = string("coreml_update_state_24")]; tensor attention_12_key_cache_begin_0 = const()[name = string("attention_12_key_cache_begin_0"), val = tensor([12, 0, 0, 0])]; tensor attention_12_key_cache_end_0 = const()[name = string("attention_12_key_cache_end_0"), val = tensor([13, 2, 512, 64])]; tensor attention_12_key_cache_squeeze_mask_0 = const()[name = string("attention_12_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_key_cache = slice_by_index(begin = attention_12_key_cache_begin_0, end = attention_12_key_cache_end_0, squeeze_mask = attention_12_key_cache_squeeze_mask_0, x = coreml_update_state_24)[name = string("attention_12_key_cache")]; int32 attention_12_key_cache_head_axis_0 = const()[name = string("attention_12_key_cache_head_axis_0"), val = int32(1)]; int32 attention_12_key_cache_head_num_splits_0 = const()[name = string("attention_12_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_12_key_cache_head_0, tensor attention_12_key_cache_head_1 = split(axis = attention_12_key_cache_head_axis_0, num_splits = attention_12_key_cache_head_num_splits_0, x = attention_12_key_cache)[name = string("attention_12_key_cache_head")]; tensor attention_12_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_updated_value_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_value_cache_0_squeeze_mask_0, update = attention_12_split_qkv_heads_2, x = coreml_update_state_23)[name = string("attention_12_updated_value_cache_0")]; write_state(data = attention_12_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_25_write_state")]; tensor coreml_update_state_25 = read_state(input = value_cache_state)[name = string("coreml_update_state_25")]; tensor attention_12_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_12_slice_current_layer_value_cache_begin_0"), val = tensor([12, 0, 0, 0])]; tensor attention_12_slice_current_layer_value_cache_end_0 = const()[name = string("attention_12_slice_current_layer_value_cache_end_0"), val = tensor([13, 2, 512, 64])]; tensor attention_12_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_12_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_slice_current_layer_value_cache = slice_by_index(begin = attention_12_slice_current_layer_value_cache_begin_0, end = attention_12_slice_current_layer_value_cache_end_0, squeeze_mask = attention_12_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_25)[name = string("attention_12_slice_current_layer_value_cache")]; int32 attention_12_slice_value_cache_heads_axis_0 = const()[name = string("attention_12_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_12_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_12_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_12_slice_value_cache_heads_0, tensor attention_12_slice_value_cache_heads_1 = split(axis = attention_12_slice_value_cache_heads_axis_0, num_splits = attention_12_slice_value_cache_heads_num_splits_0, x = attention_12_slice_current_layer_value_cache)[name = string("attention_12_slice_value_cache_heads")]; bool attention_12_scores_0_transpose_y_0 = const()[name = string("attention_12_scores_0_transpose_y_0"), val = bool(true)]; bool attention_12_scores_0_transpose_x_0 = const()[name = string("attention_12_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_12_scores_0 = matmul(transpose_x = attention_12_scores_0_transpose_x_0, transpose_y = attention_12_scores_0_transpose_y_0, x = attention_12_key_cache_head_0, y = attention_12_q_splits_0)[name = string("attention_12_scores_0")]; fp16 attention_12_scaled_scores_0_y_0 = const()[name = string("attention_12_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_12_scaled_scores_0 = mul(x = attention_12_scores_0, y = attention_12_scaled_scores_0_y_0)[name = string("attention_12_scaled_scores_0")]; tensor attention_12_masked_scaled_scores_0 = add(x = attention_12_scaled_scores_0, y = transpose_0)[name = string("attention_12_masked_scaled_scores_0")]; int32 softmax_24_axis_0 = const()[name = string("softmax_24_axis_0"), val = int32(-2)]; tensor softmax_24 = softmax(axis = softmax_24_axis_0, x = attention_12_masked_scaled_scores_0)[name = string("softmax_24")]; bool attention_12_attention_0_transpose_x_0 = const()[name = string("attention_12_attention_0_transpose_x_0"), val = bool(true)]; bool attention_12_attention_0_transpose_y_0 = const()[name = string("attention_12_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_12_attention_0 = matmul(transpose_x = attention_12_attention_0_transpose_x_0, transpose_y = attention_12_attention_0_transpose_y_0, x = softmax_24, y = attention_12_slice_value_cache_heads_0)[name = string("attention_12_attention_0")]; bool attention_12_scores_1_transpose_y_0 = const()[name = string("attention_12_scores_1_transpose_y_0"), val = bool(true)]; bool attention_12_scores_1_transpose_x_0 = const()[name = string("attention_12_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_12_scores_1 = matmul(transpose_x = attention_12_scores_1_transpose_x_0, transpose_y = attention_12_scores_1_transpose_y_0, x = attention_12_key_cache_head_1, y = attention_12_q_splits_1)[name = string("attention_12_scores_1")]; fp16 attention_12_scaled_scores_1_y_0 = const()[name = string("attention_12_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_12_scaled_scores_1 = mul(x = attention_12_scores_1, y = attention_12_scaled_scores_1_y_0)[name = string("attention_12_scaled_scores_1")]; tensor attention_12_masked_scaled_scores_1 = add(x = attention_12_scaled_scores_1, y = transpose_0)[name = string("attention_12_masked_scaled_scores_1")]; int32 softmax_25_axis_0 = const()[name = string("softmax_25_axis_0"), val = int32(-2)]; tensor softmax_25 = softmax(axis = softmax_25_axis_0, x = attention_12_masked_scaled_scores_1)[name = string("softmax_25")]; bool attention_12_attention_1_transpose_x_0 = const()[name = string("attention_12_attention_1_transpose_x_0"), val = bool(true)]; bool attention_12_attention_1_transpose_y_0 = const()[name = string("attention_12_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_12_attention_1 = matmul(transpose_x = attention_12_attention_1_transpose_x_0, transpose_y = attention_12_attention_1_transpose_y_0, x = softmax_25, y = attention_12_slice_value_cache_heads_1)[name = string("attention_12_attention_1")]; int32 attention_12_concat_attention_all_heads_axis_0 = const()[name = string("attention_12_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_12_concat_attention_all_heads_interleave_0 = const()[name = string("attention_12_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_12_concat_attention_all_heads = concat(axis = attention_12_concat_attention_all_heads_axis_0, interleave = attention_12_concat_attention_all_heads_interleave_0, values = (attention_12_attention_0, attention_12_attention_1))[name = string("attention_12_concat_attention_all_heads")]; tensor attention_12_channels_first_retransposed_perm_0 = const()[name = string("attention_12_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_12_reshaped_shape_0 = const()[name = string("attention_12_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_12_channels_first_retransposed = transpose(perm = attention_12_channels_first_retransposed_perm_0, x = attention_12_concat_attention_all_heads)[name = string("transpose_23")]; tensor attention_12_reshaped = reshape(shape = attention_12_reshaped_shape_0, x = attention_12_channels_first_retransposed)[name = string("attention_12_reshaped")]; tensor attention_12_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431387648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431989824))))[name = string("attention_12_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_56 = constexpr_blockwise_shift_scale(data = attention_12_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432018560))))[name = string("constexpr_blockwise_shift_scale_56")]; tensor attention_12_outproj_strides_0 = const()[name = string("attention_12_outproj_strides_0"), val = tensor([1])]; string attention_12_outproj_pad_type_0 = const()[name = string("attention_12_outproj_pad_type_0"), val = string("valid")]; tensor attention_12_outproj_pad_0 = const()[name = string("attention_12_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_12_outproj_dilations_0 = const()[name = string("attention_12_outproj_dilations_0"), val = tensor([1])]; int32 attention_12_outproj_groups_0 = const()[name = string("attention_12_outproj_groups_0"), val = int32(1)]; tensor attention_12_outproj = conv(dilations = attention_12_outproj_dilations_0, groups = attention_12_outproj_groups_0, pad = attention_12_outproj_pad_0, pad_type = attention_12_outproj_pad_type_0, strides = attention_12_outproj_strides_0, weight = constexpr_blockwise_shift_scale_56, x = attention_12_reshaped)[name = string("attention_12_outproj")]; tensor block_12_residual_1 = add(x = block_11_residual_2, y = attention_12_outproj)[name = string("block_12_residual_1")]; tensor block_12_ffn_rmsnorm_abs = abs(x = block_12_residual_1)[name = string("block_12_ffn_rmsnorm_abs")]; tensor block_12_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_12_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_12_ffn_rmsnorm_maxval = reduce_max(axes = block_12_ffn_rmsnorm_maxval_axes_0, keep_dims = block_12_ffn_rmsnorm_maxval_keep_dims_0, x = block_12_ffn_rmsnorm_abs)[name = string("block_12_ffn_rmsnorm_maxval")]; fp16 block_12_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_12_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_12_ffn_rmsnorm_maxval_clipped = clip(alpha = block_12_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_12_ffn_rmsnorm_maxval_clipped_beta_0, x = block_12_ffn_rmsnorm_maxval)[name = string("block_12_ffn_rmsnorm_maxval_clipped")]; tensor block_12_ffn_rmsnorm_scaled = real_div(x = block_12_residual_1, y = block_12_ffn_rmsnorm_maxval_clipped)[name = string("block_12_ffn_rmsnorm_scaled")]; tensor block_12_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_12_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_12_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_12_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_12_ffn_rmsnorm_scaled)[name = string("block_12_ffn_rmsnorm_squared_sum")]; fp16 block_12_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_12_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_12_ffn_rmsnorm_rsqrt_epsilon_0, x = block_12_ffn_rmsnorm_squared_sum)[name = string("block_12_ffn_rmsnorm_rsqrt")]; fp16 block_12_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_12_ffn_rmsnorm_dim_scaled = mul(x = block_12_ffn_rmsnorm_scaled, y = block_12_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_12_ffn_rmsnorm_dim_scaled")]; tensor block_12_ffn_rmsnorm_normalized = mul(x = block_12_ffn_rmsnorm_dim_scaled, y = block_12_ffn_rmsnorm_rsqrt)[name = string("block_12_ffn_rmsnorm_normalized")]; tensor block_12_ffn_rmsnorm_y_0 = const()[name = string("block_12_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432020416)))]; tensor block_12_ffn_rmsnorm = mul(x = block_12_ffn_rmsnorm_normalized, y = block_12_ffn_rmsnorm_y_0)[name = string("block_12_ffn_rmsnorm")]; tensor block_12_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432022272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435290944))))[name = string("block_12_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_57 = constexpr_blockwise_shift_scale(data = block_12_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435446656))))[name = string("constexpr_blockwise_shift_scale_57")]; tensor block_12_ffn_inproj_strides_0 = const()[name = string("block_12_ffn_inproj_strides_0"), val = tensor([1])]; string block_12_ffn_inproj_pad_type_0 = const()[name = string("block_12_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_12_ffn_inproj_pad_0 = const()[name = string("block_12_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_12_ffn_inproj_dilations_0 = const()[name = string("block_12_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_12_ffn_inproj_groups_0 = const()[name = string("block_12_ffn_inproj_groups_0"), val = int32(1)]; tensor block_12_ffn_inproj = conv(dilations = block_12_ffn_inproj_dilations_0, groups = block_12_ffn_inproj_groups_0, pad = block_12_ffn_inproj_pad_0, pad_type = block_12_ffn_inproj_pad_type_0, strides = block_12_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_57, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_inproj")]; tensor block_12_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435456448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438725120))))[name = string("block_12_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_58 = constexpr_blockwise_shift_scale(data = block_12_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438880832))))[name = string("constexpr_blockwise_shift_scale_58")]; tensor block_12_ffn_g_strides_0 = const()[name = string("block_12_ffn_g_strides_0"), val = tensor([1])]; string block_12_ffn_g_pad_type_0 = const()[name = string("block_12_ffn_g_pad_type_0"), val = string("valid")]; tensor block_12_ffn_g_pad_0 = const()[name = string("block_12_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_12_ffn_g_dilations_0 = const()[name = string("block_12_ffn_g_dilations_0"), val = tensor([1])]; int32 block_12_ffn_g_groups_0 = const()[name = string("block_12_ffn_g_groups_0"), val = int32(1)]; tensor block_12_ffn_g = conv(dilations = block_12_ffn_g_dilations_0, groups = block_12_ffn_g_groups_0, pad = block_12_ffn_g_pad_0, pad_type = block_12_ffn_g_pad_type_0, strides = block_12_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_58, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_g")]; tensor block_12_ffn_g_activation = silu(x = block_12_ffn_g)[name = string("block_12_ffn_g_activation")]; tensor block_12_ffn_x_gated = mul(x = block_12_ffn_inproj, y = block_12_ffn_g_activation)[name = string("block_12_ffn_x_gated")]; tensor block_12_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438890624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442159296))))[name = string("block_12_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_59 = constexpr_blockwise_shift_scale(data = block_12_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442188032))))[name = string("constexpr_blockwise_shift_scale_59")]; tensor block_12_ffn_outproj_strides_0 = const()[name = string("block_12_ffn_outproj_strides_0"), val = tensor([1])]; string block_12_ffn_outproj_pad_type_0 = const()[name = string("block_12_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_12_ffn_outproj_pad_0 = const()[name = string("block_12_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_12_ffn_outproj_dilations_0 = const()[name = string("block_12_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_12_ffn_outproj_groups_0 = const()[name = string("block_12_ffn_outproj_groups_0"), val = int32(1)]; tensor block_12_ffn_outproj = conv(dilations = block_12_ffn_outproj_dilations_0, groups = block_12_ffn_outproj_groups_0, pad = block_12_ffn_outproj_pad_0, pad_type = block_12_ffn_outproj_pad_type_0, strides = block_12_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_59, x = block_12_ffn_x_gated)[name = string("block_12_ffn_outproj")]; tensor block_12_residual_2 = add(x = block_12_ffn_outproj, y = block_12_residual_1)[name = string("block_12_residual_2")]; tensor block_13_attention_rmsnorm_abs = abs(x = block_12_residual_2)[name = string("block_13_attention_rmsnorm_abs")]; tensor block_13_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_13_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_13_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_13_attention_rmsnorm_maxval = reduce_max(axes = block_13_attention_rmsnorm_maxval_axes_0, keep_dims = block_13_attention_rmsnorm_maxval_keep_dims_0, x = block_13_attention_rmsnorm_abs)[name = string("block_13_attention_rmsnorm_maxval")]; fp16 block_13_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_13_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_13_attention_rmsnorm_maxval_clipped = clip(alpha = block_13_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_13_attention_rmsnorm_maxval_clipped_beta_0, x = block_13_attention_rmsnorm_maxval)[name = string("block_13_attention_rmsnorm_maxval_clipped")]; tensor block_13_attention_rmsnorm_scaled = real_div(x = block_12_residual_2, y = block_13_attention_rmsnorm_maxval_clipped)[name = string("block_13_attention_rmsnorm_scaled")]; tensor block_13_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_13_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_13_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_13_attention_rmsnorm_squared_sum_keep_dims_0, x = block_13_attention_rmsnorm_scaled)[name = string("block_13_attention_rmsnorm_squared_sum")]; fp16 block_13_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_13_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_13_attention_rmsnorm_rsqrt_epsilon_0, x = block_13_attention_rmsnorm_squared_sum)[name = string("block_13_attention_rmsnorm_rsqrt")]; fp16 block_13_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_13_attention_rmsnorm_dim_scaled = mul(x = block_13_attention_rmsnorm_scaled, y = block_13_attention_rmsnorm_dim_scaled_y_0)[name = string("block_13_attention_rmsnorm_dim_scaled")]; tensor block_13_attention_rmsnorm_normalized = mul(x = block_13_attention_rmsnorm_dim_scaled, y = block_13_attention_rmsnorm_rsqrt)[name = string("block_13_attention_rmsnorm_normalized")]; tensor block_13_attention_rmsnorm_y_0 = const()[name = string("block_13_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442189888)))]; tensor block_13_attention_rmsnorm = mul(x = block_13_attention_rmsnorm_normalized, y = block_13_attention_rmsnorm_y_0)[name = string("block_13_attention_rmsnorm")]; tensor attention_13_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442191744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442965952))))[name = string("attention_13_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_60 = constexpr_blockwise_shift_scale(data = attention_13_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443002880))))[name = string("constexpr_blockwise_shift_scale_60")]; tensor attention_13_qkvproj_bias_0 = const()[name = string("attention_13_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443005248)))]; tensor attention_13_qkvproj_strides_0 = const()[name = string("attention_13_qkvproj_strides_0"), val = tensor([1])]; string attention_13_qkvproj_pad_type_0 = const()[name = string("attention_13_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_13_qkvproj_pad_0 = const()[name = string("attention_13_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_13_qkvproj_dilations_0 = const()[name = string("attention_13_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_13_qkvproj_groups_0 = const()[name = string("attention_13_qkvproj_groups_0"), val = int32(1)]; tensor attention_13_qkvproj = conv(bias = attention_13_qkvproj_bias_0, dilations = attention_13_qkvproj_dilations_0, groups = attention_13_qkvproj_groups_0, pad = attention_13_qkvproj_pad_0, pad_type = attention_13_qkvproj_pad_type_0, strides = attention_13_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_60, x = block_13_attention_rmsnorm)[name = string("attention_13_qkvproj")]; tensor attention_13_head_reshape_shape_0 = const()[name = string("attention_13_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_13_head_reshape = reshape(shape = attention_13_head_reshape_shape_0, x = attention_13_qkvproj)[name = string("attention_13_head_reshape")]; tensor attention_13_head_transpose_perm_0 = const()[name = string("attention_13_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_13_split_qkv_heads_axis_0 = const()[name = string("attention_13_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_13_split_qkv_heads_split_sizes_0 = const()[name = string("attention_13_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_13_head_transpose = transpose(perm = attention_13_head_transpose_perm_0, x = attention_13_head_reshape)[name = string("transpose_22")]; tensor attention_13_split_qkv_heads_0, tensor attention_13_split_qkv_heads_1, tensor attention_13_split_qkv_heads_2 = split(axis = attention_13_split_qkv_heads_axis_0, split_sizes = attention_13_split_qkv_heads_split_sizes_0, x = attention_13_head_transpose)[name = string("attention_13_split_qkv_heads")]; tensor attention_13_q_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_13_q_rope_lhs_mult")]; int32 attention_13_q_rotate_half_split_num_splits_0 = const()[name = string("attention_13_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_13_q_rotate_half_split_axis_0 = const()[name = string("attention_13_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_13_q_rotate_half_split_0, tensor attention_13_q_rotate_half_split_1 = split(axis = attention_13_q_rotate_half_split_axis_0, num_splits = attention_13_q_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_0)[name = string("attention_13_q_rotate_half_split")]; fp16 attention_13_q_rotate_half_neg_y_0 = const()[name = string("attention_13_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_13_q_rotate_half_neg = mul(x = attention_13_q_rotate_half_split_1, y = attention_13_q_rotate_half_neg_y_0)[name = string("attention_13_q_rotate_half_neg")]; int32 attention_13_q_rotate_half_concat_axis_0 = const()[name = string("attention_13_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_13_q_rotate_half_concat_interleave_0 = const()[name = string("attention_13_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_13_q_rotate_half_concat = concat(axis = attention_13_q_rotate_half_concat_axis_0, interleave = attention_13_q_rotate_half_concat_interleave_0, values = (attention_13_q_rotate_half_neg, attention_13_q_rotate_half_split_0))[name = string("attention_13_q_rotate_half_concat")]; tensor attention_13_q_rope_rhs_mult = mul(x = attention_13_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_q_rope_rhs_mult")]; tensor attention_13_q_rope = add(x = attention_13_q_rope_lhs_mult, y = attention_13_q_rope_rhs_mult)[name = string("attention_13_q_rope")]; tensor attention_13_k_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_13_k_rope_lhs_mult")]; int32 attention_13_k_rotate_half_split_num_splits_0 = const()[name = string("attention_13_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_13_k_rotate_half_split_axis_0 = const()[name = string("attention_13_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_13_k_rotate_half_split_0, tensor attention_13_k_rotate_half_split_1 = split(axis = attention_13_k_rotate_half_split_axis_0, num_splits = attention_13_k_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_1)[name = string("attention_13_k_rotate_half_split")]; fp16 attention_13_k_rotate_half_neg_y_0 = const()[name = string("attention_13_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_13_k_rotate_half_neg = mul(x = attention_13_k_rotate_half_split_1, y = attention_13_k_rotate_half_neg_y_0)[name = string("attention_13_k_rotate_half_neg")]; int32 attention_13_k_rotate_half_concat_axis_0 = const()[name = string("attention_13_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_13_k_rotate_half_concat_interleave_0 = const()[name = string("attention_13_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_13_k_rotate_half_concat = concat(axis = attention_13_k_rotate_half_concat_axis_0, interleave = attention_13_k_rotate_half_concat_interleave_0, values = (attention_13_k_rotate_half_neg, attention_13_k_rotate_half_split_0))[name = string("attention_13_k_rotate_half_concat")]; tensor attention_13_k_rope_rhs_mult = mul(x = attention_13_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_k_rope_rhs_mult")]; tensor attention_13_k_rope = add(x = attention_13_k_rope_lhs_mult, y = attention_13_k_rope_rhs_mult)[name = string("attention_13_k_rope")]; int32 attention_13_q_splits_axis_0 = const()[name = string("attention_13_q_splits_axis_0"), val = int32(1)]; int32 attention_13_q_splits_num_splits_0 = const()[name = string("attention_13_q_splits_num_splits_0"), val = int32(2)]; tensor attention_13_q_splits_0, tensor attention_13_q_splits_1 = split(axis = attention_13_q_splits_axis_0, num_splits = attention_13_q_splits_num_splits_0, x = attention_13_q_rope)[name = string("attention_13_q_splits")]; tensor attention_13_update_begin_0_values0_0 = const()[name = string("attention_13_update_begin_0_values0_0"), val = tensor([13])]; tensor attention_13_update_begin_0_values1_0 = const()[name = string("attention_13_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_13_update_begin_0_values3_0 = const()[name = string("attention_13_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_13_update_begin_0_axis_0 = const()[name = string("attention_13_update_begin_0_axis_0"), val = int32(0)]; bool attention_13_update_begin_0_interleave_0 = const()[name = string("attention_13_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_13_update_begin_0 = concat(axis = attention_13_update_begin_0_axis_0, interleave = attention_13_update_begin_0_interleave_0, values = (attention_13_update_begin_0_values0_0, attention_13_update_begin_0_values1_0, query_pos1, attention_13_update_begin_0_values3_0))[name = string("attention_13_update_begin_0")]; tensor attention_13_update_end_0_values0_0 = const()[name = string("attention_13_update_end_0_values0_0"), val = tensor([14])]; tensor attention_13_update_end_0_values1_0 = const()[name = string("attention_13_update_end_0_values1_0"), val = tensor([2])]; tensor attention_13_update_end_0_values3_0 = const()[name = string("attention_13_update_end_0_values3_0"), val = tensor([64])]; int32 attention_13_update_end_0_axis_0 = const()[name = string("attention_13_update_end_0_axis_0"), val = int32(0)]; bool attention_13_update_end_0_interleave_0 = const()[name = string("attention_13_update_end_0_interleave_0"), val = bool(false)]; tensor attention_13_update_end_0 = concat(axis = attention_13_update_end_0_axis_0, interleave = attention_13_update_end_0_interleave_0, values = (attention_13_update_end_0_values0_0, attention_13_update_end_0_values1_0, end_pos_0, attention_13_update_end_0_values3_0))[name = string("attention_13_update_end_0")]; tensor attention_13_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_updated_key_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_key_cache_0_squeeze_mask_0, update = attention_13_k_rope, x = coreml_update_state_24)[name = string("attention_13_updated_key_cache_0")]; write_state(data = attention_13_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_26_write_state")]; tensor coreml_update_state_26 = read_state(input = key_cache_state)[name = string("coreml_update_state_26")]; tensor attention_13_key_cache_begin_0 = const()[name = string("attention_13_key_cache_begin_0"), val = tensor([13, 0, 0, 0])]; tensor attention_13_key_cache_end_0 = const()[name = string("attention_13_key_cache_end_0"), val = tensor([14, 2, 512, 64])]; tensor attention_13_key_cache_squeeze_mask_0 = const()[name = string("attention_13_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_key_cache = slice_by_index(begin = attention_13_key_cache_begin_0, end = attention_13_key_cache_end_0, squeeze_mask = attention_13_key_cache_squeeze_mask_0, x = coreml_update_state_26)[name = string("attention_13_key_cache")]; int32 attention_13_key_cache_head_axis_0 = const()[name = string("attention_13_key_cache_head_axis_0"), val = int32(1)]; int32 attention_13_key_cache_head_num_splits_0 = const()[name = string("attention_13_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_13_key_cache_head_0, tensor attention_13_key_cache_head_1 = split(axis = attention_13_key_cache_head_axis_0, num_splits = attention_13_key_cache_head_num_splits_0, x = attention_13_key_cache)[name = string("attention_13_key_cache_head")]; tensor attention_13_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_updated_value_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_value_cache_0_squeeze_mask_0, update = attention_13_split_qkv_heads_2, x = coreml_update_state_25)[name = string("attention_13_updated_value_cache_0")]; write_state(data = attention_13_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_27_write_state")]; tensor coreml_update_state_27 = read_state(input = value_cache_state)[name = string("coreml_update_state_27")]; tensor attention_13_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_13_slice_current_layer_value_cache_begin_0"), val = tensor([13, 0, 0, 0])]; tensor attention_13_slice_current_layer_value_cache_end_0 = const()[name = string("attention_13_slice_current_layer_value_cache_end_0"), val = tensor([14, 2, 512, 64])]; tensor attention_13_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_13_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_slice_current_layer_value_cache = slice_by_index(begin = attention_13_slice_current_layer_value_cache_begin_0, end = attention_13_slice_current_layer_value_cache_end_0, squeeze_mask = attention_13_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_27)[name = string("attention_13_slice_current_layer_value_cache")]; int32 attention_13_slice_value_cache_heads_axis_0 = const()[name = string("attention_13_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_13_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_13_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_13_slice_value_cache_heads_0, tensor attention_13_slice_value_cache_heads_1 = split(axis = attention_13_slice_value_cache_heads_axis_0, num_splits = attention_13_slice_value_cache_heads_num_splits_0, x = attention_13_slice_current_layer_value_cache)[name = string("attention_13_slice_value_cache_heads")]; bool attention_13_scores_0_transpose_y_0 = const()[name = string("attention_13_scores_0_transpose_y_0"), val = bool(true)]; bool attention_13_scores_0_transpose_x_0 = const()[name = string("attention_13_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_13_scores_0 = matmul(transpose_x = attention_13_scores_0_transpose_x_0, transpose_y = attention_13_scores_0_transpose_y_0, x = attention_13_key_cache_head_0, y = attention_13_q_splits_0)[name = string("attention_13_scores_0")]; fp16 attention_13_scaled_scores_0_y_0 = const()[name = string("attention_13_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_13_scaled_scores_0 = mul(x = attention_13_scores_0, y = attention_13_scaled_scores_0_y_0)[name = string("attention_13_scaled_scores_0")]; tensor attention_13_masked_scaled_scores_0 = add(x = attention_13_scaled_scores_0, y = transpose_0)[name = string("attention_13_masked_scaled_scores_0")]; int32 softmax_26_axis_0 = const()[name = string("softmax_26_axis_0"), val = int32(-2)]; tensor softmax_26 = softmax(axis = softmax_26_axis_0, x = attention_13_masked_scaled_scores_0)[name = string("softmax_26")]; bool attention_13_attention_0_transpose_x_0 = const()[name = string("attention_13_attention_0_transpose_x_0"), val = bool(true)]; bool attention_13_attention_0_transpose_y_0 = const()[name = string("attention_13_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_13_attention_0 = matmul(transpose_x = attention_13_attention_0_transpose_x_0, transpose_y = attention_13_attention_0_transpose_y_0, x = softmax_26, y = attention_13_slice_value_cache_heads_0)[name = string("attention_13_attention_0")]; bool attention_13_scores_1_transpose_y_0 = const()[name = string("attention_13_scores_1_transpose_y_0"), val = bool(true)]; bool attention_13_scores_1_transpose_x_0 = const()[name = string("attention_13_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_13_scores_1 = matmul(transpose_x = attention_13_scores_1_transpose_x_0, transpose_y = attention_13_scores_1_transpose_y_0, x = attention_13_key_cache_head_1, y = attention_13_q_splits_1)[name = string("attention_13_scores_1")]; fp16 attention_13_scaled_scores_1_y_0 = const()[name = string("attention_13_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_13_scaled_scores_1 = mul(x = attention_13_scores_1, y = attention_13_scaled_scores_1_y_0)[name = string("attention_13_scaled_scores_1")]; tensor attention_13_masked_scaled_scores_1 = add(x = attention_13_scaled_scores_1, y = transpose_0)[name = string("attention_13_masked_scaled_scores_1")]; int32 softmax_27_axis_0 = const()[name = string("softmax_27_axis_0"), val = int32(-2)]; tensor softmax_27 = softmax(axis = softmax_27_axis_0, x = attention_13_masked_scaled_scores_1)[name = string("softmax_27")]; bool attention_13_attention_1_transpose_x_0 = const()[name = string("attention_13_attention_1_transpose_x_0"), val = bool(true)]; bool attention_13_attention_1_transpose_y_0 = const()[name = string("attention_13_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_13_attention_1 = matmul(transpose_x = attention_13_attention_1_transpose_x_0, transpose_y = attention_13_attention_1_transpose_y_0, x = softmax_27, y = attention_13_slice_value_cache_heads_1)[name = string("attention_13_attention_1")]; int32 attention_13_concat_attention_all_heads_axis_0 = const()[name = string("attention_13_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_13_concat_attention_all_heads_interleave_0 = const()[name = string("attention_13_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_13_concat_attention_all_heads = concat(axis = attention_13_concat_attention_all_heads_axis_0, interleave = attention_13_concat_attention_all_heads_interleave_0, values = (attention_13_attention_0, attention_13_attention_1))[name = string("attention_13_concat_attention_all_heads")]; tensor attention_13_channels_first_retransposed_perm_0 = const()[name = string("attention_13_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_13_reshaped_shape_0 = const()[name = string("attention_13_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_13_channels_first_retransposed = transpose(perm = attention_13_channels_first_retransposed_perm_0, x = attention_13_concat_attention_all_heads)[name = string("transpose_21")]; tensor attention_13_reshaped = reshape(shape = attention_13_reshaped_shape_0, x = attention_13_channels_first_retransposed)[name = string("attention_13_reshaped")]; tensor attention_13_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443007616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443609792))))[name = string("attention_13_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_61 = constexpr_blockwise_shift_scale(data = attention_13_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443638528))))[name = string("constexpr_blockwise_shift_scale_61")]; tensor attention_13_outproj_strides_0 = const()[name = string("attention_13_outproj_strides_0"), val = tensor([1])]; string attention_13_outproj_pad_type_0 = const()[name = string("attention_13_outproj_pad_type_0"), val = string("valid")]; tensor attention_13_outproj_pad_0 = const()[name = string("attention_13_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_13_outproj_dilations_0 = const()[name = string("attention_13_outproj_dilations_0"), val = tensor([1])]; int32 attention_13_outproj_groups_0 = const()[name = string("attention_13_outproj_groups_0"), val = int32(1)]; tensor attention_13_outproj = conv(dilations = attention_13_outproj_dilations_0, groups = attention_13_outproj_groups_0, pad = attention_13_outproj_pad_0, pad_type = attention_13_outproj_pad_type_0, strides = attention_13_outproj_strides_0, weight = constexpr_blockwise_shift_scale_61, x = attention_13_reshaped)[name = string("attention_13_outproj")]; tensor block_13_residual_1 = add(x = block_12_residual_2, y = attention_13_outproj)[name = string("block_13_residual_1")]; tensor block_13_ffn_rmsnorm_abs = abs(x = block_13_residual_1)[name = string("block_13_ffn_rmsnorm_abs")]; tensor block_13_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_13_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_13_ffn_rmsnorm_maxval = reduce_max(axes = block_13_ffn_rmsnorm_maxval_axes_0, keep_dims = block_13_ffn_rmsnorm_maxval_keep_dims_0, x = block_13_ffn_rmsnorm_abs)[name = string("block_13_ffn_rmsnorm_maxval")]; fp16 block_13_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_13_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_13_ffn_rmsnorm_maxval_clipped = clip(alpha = block_13_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_13_ffn_rmsnorm_maxval_clipped_beta_0, x = block_13_ffn_rmsnorm_maxval)[name = string("block_13_ffn_rmsnorm_maxval_clipped")]; tensor block_13_ffn_rmsnorm_scaled = real_div(x = block_13_residual_1, y = block_13_ffn_rmsnorm_maxval_clipped)[name = string("block_13_ffn_rmsnorm_scaled")]; tensor block_13_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_13_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_13_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_13_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_13_ffn_rmsnorm_scaled)[name = string("block_13_ffn_rmsnorm_squared_sum")]; fp16 block_13_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_13_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_13_ffn_rmsnorm_rsqrt_epsilon_0, x = block_13_ffn_rmsnorm_squared_sum)[name = string("block_13_ffn_rmsnorm_rsqrt")]; fp16 block_13_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_13_ffn_rmsnorm_dim_scaled = mul(x = block_13_ffn_rmsnorm_scaled, y = block_13_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_13_ffn_rmsnorm_dim_scaled")]; tensor block_13_ffn_rmsnorm_normalized = mul(x = block_13_ffn_rmsnorm_dim_scaled, y = block_13_ffn_rmsnorm_rsqrt)[name = string("block_13_ffn_rmsnorm_normalized")]; tensor block_13_ffn_rmsnorm_y_0 = const()[name = string("block_13_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443640384)))]; tensor block_13_ffn_rmsnorm = mul(x = block_13_ffn_rmsnorm_normalized, y = block_13_ffn_rmsnorm_y_0)[name = string("block_13_ffn_rmsnorm")]; tensor block_13_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443642240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446910912))))[name = string("block_13_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_62 = constexpr_blockwise_shift_scale(data = block_13_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447066624))))[name = string("constexpr_blockwise_shift_scale_62")]; tensor block_13_ffn_inproj_strides_0 = const()[name = string("block_13_ffn_inproj_strides_0"), val = tensor([1])]; string block_13_ffn_inproj_pad_type_0 = const()[name = string("block_13_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_13_ffn_inproj_pad_0 = const()[name = string("block_13_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_13_ffn_inproj_dilations_0 = const()[name = string("block_13_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_13_ffn_inproj_groups_0 = const()[name = string("block_13_ffn_inproj_groups_0"), val = int32(1)]; tensor block_13_ffn_inproj = conv(dilations = block_13_ffn_inproj_dilations_0, groups = block_13_ffn_inproj_groups_0, pad = block_13_ffn_inproj_pad_0, pad_type = block_13_ffn_inproj_pad_type_0, strides = block_13_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_62, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_inproj")]; tensor block_13_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447076416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450345088))))[name = string("block_13_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_63 = constexpr_blockwise_shift_scale(data = block_13_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450500800))))[name = string("constexpr_blockwise_shift_scale_63")]; tensor block_13_ffn_g_strides_0 = const()[name = string("block_13_ffn_g_strides_0"), val = tensor([1])]; string block_13_ffn_g_pad_type_0 = const()[name = string("block_13_ffn_g_pad_type_0"), val = string("valid")]; tensor block_13_ffn_g_pad_0 = const()[name = string("block_13_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_13_ffn_g_dilations_0 = const()[name = string("block_13_ffn_g_dilations_0"), val = tensor([1])]; int32 block_13_ffn_g_groups_0 = const()[name = string("block_13_ffn_g_groups_0"), val = int32(1)]; tensor block_13_ffn_g = conv(dilations = block_13_ffn_g_dilations_0, groups = block_13_ffn_g_groups_0, pad = block_13_ffn_g_pad_0, pad_type = block_13_ffn_g_pad_type_0, strides = block_13_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_63, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_g")]; tensor block_13_ffn_g_activation = silu(x = block_13_ffn_g)[name = string("block_13_ffn_g_activation")]; tensor block_13_ffn_x_gated = mul(x = block_13_ffn_inproj, y = block_13_ffn_g_activation)[name = string("block_13_ffn_x_gated")]; tensor block_13_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450510592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453779264))))[name = string("block_13_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_64 = constexpr_blockwise_shift_scale(data = block_13_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453808000))))[name = string("constexpr_blockwise_shift_scale_64")]; tensor block_13_ffn_outproj_strides_0 = const()[name = string("block_13_ffn_outproj_strides_0"), val = tensor([1])]; string block_13_ffn_outproj_pad_type_0 = const()[name = string("block_13_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_13_ffn_outproj_pad_0 = const()[name = string("block_13_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_13_ffn_outproj_dilations_0 = const()[name = string("block_13_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_13_ffn_outproj_groups_0 = const()[name = string("block_13_ffn_outproj_groups_0"), val = int32(1)]; tensor block_13_ffn_outproj = conv(dilations = block_13_ffn_outproj_dilations_0, groups = block_13_ffn_outproj_groups_0, pad = block_13_ffn_outproj_pad_0, pad_type = block_13_ffn_outproj_pad_type_0, strides = block_13_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_64, x = block_13_ffn_x_gated)[name = string("block_13_ffn_outproj")]; tensor block_13_residual_2 = add(x = block_13_ffn_outproj, y = block_13_residual_1)[name = string("block_13_residual_2")]; tensor block_14_attention_rmsnorm_abs = abs(x = block_13_residual_2)[name = string("block_14_attention_rmsnorm_abs")]; tensor block_14_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_14_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_14_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_14_attention_rmsnorm_maxval = reduce_max(axes = block_14_attention_rmsnorm_maxval_axes_0, keep_dims = block_14_attention_rmsnorm_maxval_keep_dims_0, x = block_14_attention_rmsnorm_abs)[name = string("block_14_attention_rmsnorm_maxval")]; fp16 block_14_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_14_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_14_attention_rmsnorm_maxval_clipped = clip(alpha = block_14_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_14_attention_rmsnorm_maxval_clipped_beta_0, x = block_14_attention_rmsnorm_maxval)[name = string("block_14_attention_rmsnorm_maxval_clipped")]; tensor block_14_attention_rmsnorm_scaled = real_div(x = block_13_residual_2, y = block_14_attention_rmsnorm_maxval_clipped)[name = string("block_14_attention_rmsnorm_scaled")]; tensor block_14_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_14_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_14_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_14_attention_rmsnorm_squared_sum_keep_dims_0, x = block_14_attention_rmsnorm_scaled)[name = string("block_14_attention_rmsnorm_squared_sum")]; fp16 block_14_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_14_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_14_attention_rmsnorm_rsqrt_epsilon_0, x = block_14_attention_rmsnorm_squared_sum)[name = string("block_14_attention_rmsnorm_rsqrt")]; fp16 block_14_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_14_attention_rmsnorm_dim_scaled = mul(x = block_14_attention_rmsnorm_scaled, y = block_14_attention_rmsnorm_dim_scaled_y_0)[name = string("block_14_attention_rmsnorm_dim_scaled")]; tensor block_14_attention_rmsnorm_normalized = mul(x = block_14_attention_rmsnorm_dim_scaled, y = block_14_attention_rmsnorm_rsqrt)[name = string("block_14_attention_rmsnorm_normalized")]; tensor block_14_attention_rmsnorm_y_0 = const()[name = string("block_14_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453809856)))]; tensor block_14_attention_rmsnorm = mul(x = block_14_attention_rmsnorm_normalized, y = block_14_attention_rmsnorm_y_0)[name = string("block_14_attention_rmsnorm")]; tensor attention_14_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454585920))))[name = string("attention_14_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_65 = constexpr_blockwise_shift_scale(data = attention_14_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454622848))))[name = string("constexpr_blockwise_shift_scale_65")]; tensor attention_14_qkvproj_bias_0 = const()[name = string("attention_14_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454625216)))]; tensor attention_14_qkvproj_strides_0 = const()[name = string("attention_14_qkvproj_strides_0"), val = tensor([1])]; string attention_14_qkvproj_pad_type_0 = const()[name = string("attention_14_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_14_qkvproj_pad_0 = const()[name = string("attention_14_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_14_qkvproj_dilations_0 = const()[name = string("attention_14_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_14_qkvproj_groups_0 = const()[name = string("attention_14_qkvproj_groups_0"), val = int32(1)]; tensor attention_14_qkvproj = conv(bias = attention_14_qkvproj_bias_0, dilations = attention_14_qkvproj_dilations_0, groups = attention_14_qkvproj_groups_0, pad = attention_14_qkvproj_pad_0, pad_type = attention_14_qkvproj_pad_type_0, strides = attention_14_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_65, x = block_14_attention_rmsnorm)[name = string("attention_14_qkvproj")]; tensor attention_14_head_reshape_shape_0 = const()[name = string("attention_14_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_14_head_reshape = reshape(shape = attention_14_head_reshape_shape_0, x = attention_14_qkvproj)[name = string("attention_14_head_reshape")]; tensor attention_14_head_transpose_perm_0 = const()[name = string("attention_14_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_14_split_qkv_heads_axis_0 = const()[name = string("attention_14_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_14_split_qkv_heads_split_sizes_0 = const()[name = string("attention_14_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_14_head_transpose = transpose(perm = attention_14_head_transpose_perm_0, x = attention_14_head_reshape)[name = string("transpose_20")]; tensor attention_14_split_qkv_heads_0, tensor attention_14_split_qkv_heads_1, tensor attention_14_split_qkv_heads_2 = split(axis = attention_14_split_qkv_heads_axis_0, split_sizes = attention_14_split_qkv_heads_split_sizes_0, x = attention_14_head_transpose)[name = string("attention_14_split_qkv_heads")]; tensor attention_14_q_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_14_q_rope_lhs_mult")]; int32 attention_14_q_rotate_half_split_num_splits_0 = const()[name = string("attention_14_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_14_q_rotate_half_split_axis_0 = const()[name = string("attention_14_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_14_q_rotate_half_split_0, tensor attention_14_q_rotate_half_split_1 = split(axis = attention_14_q_rotate_half_split_axis_0, num_splits = attention_14_q_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_0)[name = string("attention_14_q_rotate_half_split")]; fp16 attention_14_q_rotate_half_neg_y_0 = const()[name = string("attention_14_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_14_q_rotate_half_neg = mul(x = attention_14_q_rotate_half_split_1, y = attention_14_q_rotate_half_neg_y_0)[name = string("attention_14_q_rotate_half_neg")]; int32 attention_14_q_rotate_half_concat_axis_0 = const()[name = string("attention_14_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_14_q_rotate_half_concat_interleave_0 = const()[name = string("attention_14_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_14_q_rotate_half_concat = concat(axis = attention_14_q_rotate_half_concat_axis_0, interleave = attention_14_q_rotate_half_concat_interleave_0, values = (attention_14_q_rotate_half_neg, attention_14_q_rotate_half_split_0))[name = string("attention_14_q_rotate_half_concat")]; tensor attention_14_q_rope_rhs_mult = mul(x = attention_14_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_q_rope_rhs_mult")]; tensor attention_14_q_rope = add(x = attention_14_q_rope_lhs_mult, y = attention_14_q_rope_rhs_mult)[name = string("attention_14_q_rope")]; tensor attention_14_k_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_14_k_rope_lhs_mult")]; int32 attention_14_k_rotate_half_split_num_splits_0 = const()[name = string("attention_14_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_14_k_rotate_half_split_axis_0 = const()[name = string("attention_14_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_14_k_rotate_half_split_0, tensor attention_14_k_rotate_half_split_1 = split(axis = attention_14_k_rotate_half_split_axis_0, num_splits = attention_14_k_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_1)[name = string("attention_14_k_rotate_half_split")]; fp16 attention_14_k_rotate_half_neg_y_0 = const()[name = string("attention_14_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_14_k_rotate_half_neg = mul(x = attention_14_k_rotate_half_split_1, y = attention_14_k_rotate_half_neg_y_0)[name = string("attention_14_k_rotate_half_neg")]; int32 attention_14_k_rotate_half_concat_axis_0 = const()[name = string("attention_14_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_14_k_rotate_half_concat_interleave_0 = const()[name = string("attention_14_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_14_k_rotate_half_concat = concat(axis = attention_14_k_rotate_half_concat_axis_0, interleave = attention_14_k_rotate_half_concat_interleave_0, values = (attention_14_k_rotate_half_neg, attention_14_k_rotate_half_split_0))[name = string("attention_14_k_rotate_half_concat")]; tensor attention_14_k_rope_rhs_mult = mul(x = attention_14_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_k_rope_rhs_mult")]; tensor attention_14_k_rope = add(x = attention_14_k_rope_lhs_mult, y = attention_14_k_rope_rhs_mult)[name = string("attention_14_k_rope")]; int32 attention_14_q_splits_axis_0 = const()[name = string("attention_14_q_splits_axis_0"), val = int32(1)]; int32 attention_14_q_splits_num_splits_0 = const()[name = string("attention_14_q_splits_num_splits_0"), val = int32(2)]; tensor attention_14_q_splits_0, tensor attention_14_q_splits_1 = split(axis = attention_14_q_splits_axis_0, num_splits = attention_14_q_splits_num_splits_0, x = attention_14_q_rope)[name = string("attention_14_q_splits")]; tensor attention_14_update_begin_0_values0_0 = const()[name = string("attention_14_update_begin_0_values0_0"), val = tensor([14])]; tensor attention_14_update_begin_0_values1_0 = const()[name = string("attention_14_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_14_update_begin_0_values3_0 = const()[name = string("attention_14_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_14_update_begin_0_axis_0 = const()[name = string("attention_14_update_begin_0_axis_0"), val = int32(0)]; bool attention_14_update_begin_0_interleave_0 = const()[name = string("attention_14_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_14_update_begin_0 = concat(axis = attention_14_update_begin_0_axis_0, interleave = attention_14_update_begin_0_interleave_0, values = (attention_14_update_begin_0_values0_0, attention_14_update_begin_0_values1_0, query_pos1, attention_14_update_begin_0_values3_0))[name = string("attention_14_update_begin_0")]; tensor attention_14_update_end_0_values0_0 = const()[name = string("attention_14_update_end_0_values0_0"), val = tensor([15])]; tensor attention_14_update_end_0_values1_0 = const()[name = string("attention_14_update_end_0_values1_0"), val = tensor([2])]; tensor attention_14_update_end_0_values3_0 = const()[name = string("attention_14_update_end_0_values3_0"), val = tensor([64])]; int32 attention_14_update_end_0_axis_0 = const()[name = string("attention_14_update_end_0_axis_0"), val = int32(0)]; bool attention_14_update_end_0_interleave_0 = const()[name = string("attention_14_update_end_0_interleave_0"), val = bool(false)]; tensor attention_14_update_end_0 = concat(axis = attention_14_update_end_0_axis_0, interleave = attention_14_update_end_0_interleave_0, values = (attention_14_update_end_0_values0_0, attention_14_update_end_0_values1_0, end_pos_0, attention_14_update_end_0_values3_0))[name = string("attention_14_update_end_0")]; tensor attention_14_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_updated_key_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_key_cache_0_squeeze_mask_0, update = attention_14_k_rope, x = coreml_update_state_26)[name = string("attention_14_updated_key_cache_0")]; write_state(data = attention_14_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_28_write_state")]; tensor coreml_update_state_28 = read_state(input = key_cache_state)[name = string("coreml_update_state_28")]; tensor attention_14_key_cache_begin_0 = const()[name = string("attention_14_key_cache_begin_0"), val = tensor([14, 0, 0, 0])]; tensor attention_14_key_cache_end_0 = const()[name = string("attention_14_key_cache_end_0"), val = tensor([15, 2, 512, 64])]; tensor attention_14_key_cache_squeeze_mask_0 = const()[name = string("attention_14_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_key_cache = slice_by_index(begin = attention_14_key_cache_begin_0, end = attention_14_key_cache_end_0, squeeze_mask = attention_14_key_cache_squeeze_mask_0, x = coreml_update_state_28)[name = string("attention_14_key_cache")]; int32 attention_14_key_cache_head_axis_0 = const()[name = string("attention_14_key_cache_head_axis_0"), val = int32(1)]; int32 attention_14_key_cache_head_num_splits_0 = const()[name = string("attention_14_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_14_key_cache_head_0, tensor attention_14_key_cache_head_1 = split(axis = attention_14_key_cache_head_axis_0, num_splits = attention_14_key_cache_head_num_splits_0, x = attention_14_key_cache)[name = string("attention_14_key_cache_head")]; tensor attention_14_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_updated_value_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_value_cache_0_squeeze_mask_0, update = attention_14_split_qkv_heads_2, x = coreml_update_state_27)[name = string("attention_14_updated_value_cache_0")]; write_state(data = attention_14_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_29_write_state")]; tensor coreml_update_state_29 = read_state(input = value_cache_state)[name = string("coreml_update_state_29")]; tensor attention_14_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_14_slice_current_layer_value_cache_begin_0"), val = tensor([14, 0, 0, 0])]; tensor attention_14_slice_current_layer_value_cache_end_0 = const()[name = string("attention_14_slice_current_layer_value_cache_end_0"), val = tensor([15, 2, 512, 64])]; tensor attention_14_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_14_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_slice_current_layer_value_cache = slice_by_index(begin = attention_14_slice_current_layer_value_cache_begin_0, end = attention_14_slice_current_layer_value_cache_end_0, squeeze_mask = attention_14_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_29)[name = string("attention_14_slice_current_layer_value_cache")]; int32 attention_14_slice_value_cache_heads_axis_0 = const()[name = string("attention_14_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_14_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_14_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_14_slice_value_cache_heads_0, tensor attention_14_slice_value_cache_heads_1 = split(axis = attention_14_slice_value_cache_heads_axis_0, num_splits = attention_14_slice_value_cache_heads_num_splits_0, x = attention_14_slice_current_layer_value_cache)[name = string("attention_14_slice_value_cache_heads")]; bool attention_14_scores_0_transpose_y_0 = const()[name = string("attention_14_scores_0_transpose_y_0"), val = bool(true)]; bool attention_14_scores_0_transpose_x_0 = const()[name = string("attention_14_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_14_scores_0 = matmul(transpose_x = attention_14_scores_0_transpose_x_0, transpose_y = attention_14_scores_0_transpose_y_0, x = attention_14_key_cache_head_0, y = attention_14_q_splits_0)[name = string("attention_14_scores_0")]; fp16 attention_14_scaled_scores_0_y_0 = const()[name = string("attention_14_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_14_scaled_scores_0 = mul(x = attention_14_scores_0, y = attention_14_scaled_scores_0_y_0)[name = string("attention_14_scaled_scores_0")]; tensor attention_14_masked_scaled_scores_0 = add(x = attention_14_scaled_scores_0, y = transpose_0)[name = string("attention_14_masked_scaled_scores_0")]; int32 softmax_28_axis_0 = const()[name = string("softmax_28_axis_0"), val = int32(-2)]; tensor softmax_28 = softmax(axis = softmax_28_axis_0, x = attention_14_masked_scaled_scores_0)[name = string("softmax_28")]; bool attention_14_attention_0_transpose_x_0 = const()[name = string("attention_14_attention_0_transpose_x_0"), val = bool(true)]; bool attention_14_attention_0_transpose_y_0 = const()[name = string("attention_14_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_14_attention_0 = matmul(transpose_x = attention_14_attention_0_transpose_x_0, transpose_y = attention_14_attention_0_transpose_y_0, x = softmax_28, y = attention_14_slice_value_cache_heads_0)[name = string("attention_14_attention_0")]; bool attention_14_scores_1_transpose_y_0 = const()[name = string("attention_14_scores_1_transpose_y_0"), val = bool(true)]; bool attention_14_scores_1_transpose_x_0 = const()[name = string("attention_14_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_14_scores_1 = matmul(transpose_x = attention_14_scores_1_transpose_x_0, transpose_y = attention_14_scores_1_transpose_y_0, x = attention_14_key_cache_head_1, y = attention_14_q_splits_1)[name = string("attention_14_scores_1")]; fp16 attention_14_scaled_scores_1_y_0 = const()[name = string("attention_14_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_14_scaled_scores_1 = mul(x = attention_14_scores_1, y = attention_14_scaled_scores_1_y_0)[name = string("attention_14_scaled_scores_1")]; tensor attention_14_masked_scaled_scores_1 = add(x = attention_14_scaled_scores_1, y = transpose_0)[name = string("attention_14_masked_scaled_scores_1")]; int32 softmax_29_axis_0 = const()[name = string("softmax_29_axis_0"), val = int32(-2)]; tensor softmax_29 = softmax(axis = softmax_29_axis_0, x = attention_14_masked_scaled_scores_1)[name = string("softmax_29")]; bool attention_14_attention_1_transpose_x_0 = const()[name = string("attention_14_attention_1_transpose_x_0"), val = bool(true)]; bool attention_14_attention_1_transpose_y_0 = const()[name = string("attention_14_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_14_attention_1 = matmul(transpose_x = attention_14_attention_1_transpose_x_0, transpose_y = attention_14_attention_1_transpose_y_0, x = softmax_29, y = attention_14_slice_value_cache_heads_1)[name = string("attention_14_attention_1")]; int32 attention_14_concat_attention_all_heads_axis_0 = const()[name = string("attention_14_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_14_concat_attention_all_heads_interleave_0 = const()[name = string("attention_14_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_14_concat_attention_all_heads = concat(axis = attention_14_concat_attention_all_heads_axis_0, interleave = attention_14_concat_attention_all_heads_interleave_0, values = (attention_14_attention_0, attention_14_attention_1))[name = string("attention_14_concat_attention_all_heads")]; tensor attention_14_channels_first_retransposed_perm_0 = const()[name = string("attention_14_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_14_reshaped_shape_0 = const()[name = string("attention_14_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_14_channels_first_retransposed = transpose(perm = attention_14_channels_first_retransposed_perm_0, x = attention_14_concat_attention_all_heads)[name = string("transpose_19")]; tensor attention_14_reshaped = reshape(shape = attention_14_reshaped_shape_0, x = attention_14_channels_first_retransposed)[name = string("attention_14_reshaped")]; tensor attention_14_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454627584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455229760))))[name = string("attention_14_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_66 = constexpr_blockwise_shift_scale(data = attention_14_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455258496))))[name = string("constexpr_blockwise_shift_scale_66")]; tensor attention_14_outproj_strides_0 = const()[name = string("attention_14_outproj_strides_0"), val = tensor([1])]; string attention_14_outproj_pad_type_0 = const()[name = string("attention_14_outproj_pad_type_0"), val = string("valid")]; tensor attention_14_outproj_pad_0 = const()[name = string("attention_14_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_14_outproj_dilations_0 = const()[name = string("attention_14_outproj_dilations_0"), val = tensor([1])]; int32 attention_14_outproj_groups_0 = const()[name = string("attention_14_outproj_groups_0"), val = int32(1)]; tensor attention_14_outproj = conv(dilations = attention_14_outproj_dilations_0, groups = attention_14_outproj_groups_0, pad = attention_14_outproj_pad_0, pad_type = attention_14_outproj_pad_type_0, strides = attention_14_outproj_strides_0, weight = constexpr_blockwise_shift_scale_66, x = attention_14_reshaped)[name = string("attention_14_outproj")]; tensor block_14_residual_1 = add(x = block_13_residual_2, y = attention_14_outproj)[name = string("block_14_residual_1")]; tensor block_14_ffn_rmsnorm_abs = abs(x = block_14_residual_1)[name = string("block_14_ffn_rmsnorm_abs")]; tensor block_14_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_14_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_14_ffn_rmsnorm_maxval = reduce_max(axes = block_14_ffn_rmsnorm_maxval_axes_0, keep_dims = block_14_ffn_rmsnorm_maxval_keep_dims_0, x = block_14_ffn_rmsnorm_abs)[name = string("block_14_ffn_rmsnorm_maxval")]; fp16 block_14_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_14_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_14_ffn_rmsnorm_maxval_clipped = clip(alpha = block_14_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_14_ffn_rmsnorm_maxval_clipped_beta_0, x = block_14_ffn_rmsnorm_maxval)[name = string("block_14_ffn_rmsnorm_maxval_clipped")]; tensor block_14_ffn_rmsnorm_scaled = real_div(x = block_14_residual_1, y = block_14_ffn_rmsnorm_maxval_clipped)[name = string("block_14_ffn_rmsnorm_scaled")]; tensor block_14_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_14_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_14_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_14_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_14_ffn_rmsnorm_scaled)[name = string("block_14_ffn_rmsnorm_squared_sum")]; fp16 block_14_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_14_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_14_ffn_rmsnorm_rsqrt_epsilon_0, x = block_14_ffn_rmsnorm_squared_sum)[name = string("block_14_ffn_rmsnorm_rsqrt")]; fp16 block_14_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_14_ffn_rmsnorm_dim_scaled = mul(x = block_14_ffn_rmsnorm_scaled, y = block_14_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_14_ffn_rmsnorm_dim_scaled")]; tensor block_14_ffn_rmsnorm_normalized = mul(x = block_14_ffn_rmsnorm_dim_scaled, y = block_14_ffn_rmsnorm_rsqrt)[name = string("block_14_ffn_rmsnorm_normalized")]; tensor block_14_ffn_rmsnorm_y_0 = const()[name = string("block_14_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455260352)))]; tensor block_14_ffn_rmsnorm = mul(x = block_14_ffn_rmsnorm_normalized, y = block_14_ffn_rmsnorm_y_0)[name = string("block_14_ffn_rmsnorm")]; tensor block_14_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455262208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458530880))))[name = string("block_14_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_67 = constexpr_blockwise_shift_scale(data = block_14_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458686592))))[name = string("constexpr_blockwise_shift_scale_67")]; tensor block_14_ffn_inproj_strides_0 = const()[name = string("block_14_ffn_inproj_strides_0"), val = tensor([1])]; string block_14_ffn_inproj_pad_type_0 = const()[name = string("block_14_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_14_ffn_inproj_pad_0 = const()[name = string("block_14_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_14_ffn_inproj_dilations_0 = const()[name = string("block_14_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_14_ffn_inproj_groups_0 = const()[name = string("block_14_ffn_inproj_groups_0"), val = int32(1)]; tensor block_14_ffn_inproj = conv(dilations = block_14_ffn_inproj_dilations_0, groups = block_14_ffn_inproj_groups_0, pad = block_14_ffn_inproj_pad_0, pad_type = block_14_ffn_inproj_pad_type_0, strides = block_14_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_67, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_inproj")]; tensor block_14_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458696384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461965056))))[name = string("block_14_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_68 = constexpr_blockwise_shift_scale(data = block_14_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462120768))))[name = string("constexpr_blockwise_shift_scale_68")]; tensor block_14_ffn_g_strides_0 = const()[name = string("block_14_ffn_g_strides_0"), val = tensor([1])]; string block_14_ffn_g_pad_type_0 = const()[name = string("block_14_ffn_g_pad_type_0"), val = string("valid")]; tensor block_14_ffn_g_pad_0 = const()[name = string("block_14_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_14_ffn_g_dilations_0 = const()[name = string("block_14_ffn_g_dilations_0"), val = tensor([1])]; int32 block_14_ffn_g_groups_0 = const()[name = string("block_14_ffn_g_groups_0"), val = int32(1)]; tensor block_14_ffn_g = conv(dilations = block_14_ffn_g_dilations_0, groups = block_14_ffn_g_groups_0, pad = block_14_ffn_g_pad_0, pad_type = block_14_ffn_g_pad_type_0, strides = block_14_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_68, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_g")]; tensor block_14_ffn_g_activation = silu(x = block_14_ffn_g)[name = string("block_14_ffn_g_activation")]; tensor block_14_ffn_x_gated = mul(x = block_14_ffn_inproj, y = block_14_ffn_g_activation)[name = string("block_14_ffn_x_gated")]; tensor block_14_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462130560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465399232))))[name = string("block_14_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_69 = constexpr_blockwise_shift_scale(data = block_14_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465427968))))[name = string("constexpr_blockwise_shift_scale_69")]; tensor block_14_ffn_outproj_strides_0 = const()[name = string("block_14_ffn_outproj_strides_0"), val = tensor([1])]; string block_14_ffn_outproj_pad_type_0 = const()[name = string("block_14_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_14_ffn_outproj_pad_0 = const()[name = string("block_14_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_14_ffn_outproj_dilations_0 = const()[name = string("block_14_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_14_ffn_outproj_groups_0 = const()[name = string("block_14_ffn_outproj_groups_0"), val = int32(1)]; tensor block_14_ffn_outproj = conv(dilations = block_14_ffn_outproj_dilations_0, groups = block_14_ffn_outproj_groups_0, pad = block_14_ffn_outproj_pad_0, pad_type = block_14_ffn_outproj_pad_type_0, strides = block_14_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_69, x = block_14_ffn_x_gated)[name = string("block_14_ffn_outproj")]; tensor block_14_residual_2 = add(x = block_14_ffn_outproj, y = block_14_residual_1)[name = string("block_14_residual_2")]; tensor block_15_attention_rmsnorm_abs = abs(x = block_14_residual_2)[name = string("block_15_attention_rmsnorm_abs")]; tensor block_15_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_15_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_15_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_15_attention_rmsnorm_maxval = reduce_max(axes = block_15_attention_rmsnorm_maxval_axes_0, keep_dims = block_15_attention_rmsnorm_maxval_keep_dims_0, x = block_15_attention_rmsnorm_abs)[name = string("block_15_attention_rmsnorm_maxval")]; fp16 block_15_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_15_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_15_attention_rmsnorm_maxval_clipped = clip(alpha = block_15_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_15_attention_rmsnorm_maxval_clipped_beta_0, x = block_15_attention_rmsnorm_maxval)[name = string("block_15_attention_rmsnorm_maxval_clipped")]; tensor block_15_attention_rmsnorm_scaled = real_div(x = block_14_residual_2, y = block_15_attention_rmsnorm_maxval_clipped)[name = string("block_15_attention_rmsnorm_scaled")]; tensor block_15_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_15_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_15_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_15_attention_rmsnorm_squared_sum_keep_dims_0, x = block_15_attention_rmsnorm_scaled)[name = string("block_15_attention_rmsnorm_squared_sum")]; fp16 block_15_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_15_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_15_attention_rmsnorm_rsqrt_epsilon_0, x = block_15_attention_rmsnorm_squared_sum)[name = string("block_15_attention_rmsnorm_rsqrt")]; fp16 block_15_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_15_attention_rmsnorm_dim_scaled = mul(x = block_15_attention_rmsnorm_scaled, y = block_15_attention_rmsnorm_dim_scaled_y_0)[name = string("block_15_attention_rmsnorm_dim_scaled")]; tensor block_15_attention_rmsnorm_normalized = mul(x = block_15_attention_rmsnorm_dim_scaled, y = block_15_attention_rmsnorm_rsqrt)[name = string("block_15_attention_rmsnorm_normalized")]; tensor block_15_attention_rmsnorm_y_0 = const()[name = string("block_15_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465429824)))]; tensor block_15_attention_rmsnorm = mul(x = block_15_attention_rmsnorm_normalized, y = block_15_attention_rmsnorm_y_0)[name = string("block_15_attention_rmsnorm")]; tensor attention_15_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465431680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466205888))))[name = string("attention_15_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_70 = constexpr_blockwise_shift_scale(data = attention_15_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466242816))))[name = string("constexpr_blockwise_shift_scale_70")]; tensor attention_15_qkvproj_bias_0 = const()[name = string("attention_15_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466245184)))]; tensor attention_15_qkvproj_strides_0 = const()[name = string("attention_15_qkvproj_strides_0"), val = tensor([1])]; string attention_15_qkvproj_pad_type_0 = const()[name = string("attention_15_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_15_qkvproj_pad_0 = const()[name = string("attention_15_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_15_qkvproj_dilations_0 = const()[name = string("attention_15_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_15_qkvproj_groups_0 = const()[name = string("attention_15_qkvproj_groups_0"), val = int32(1)]; tensor attention_15_qkvproj = conv(bias = attention_15_qkvproj_bias_0, dilations = attention_15_qkvproj_dilations_0, groups = attention_15_qkvproj_groups_0, pad = attention_15_qkvproj_pad_0, pad_type = attention_15_qkvproj_pad_type_0, strides = attention_15_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_70, x = block_15_attention_rmsnorm)[name = string("attention_15_qkvproj")]; tensor attention_15_head_reshape_shape_0 = const()[name = string("attention_15_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_15_head_reshape = reshape(shape = attention_15_head_reshape_shape_0, x = attention_15_qkvproj)[name = string("attention_15_head_reshape")]; tensor attention_15_head_transpose_perm_0 = const()[name = string("attention_15_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_15_split_qkv_heads_axis_0 = const()[name = string("attention_15_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_15_split_qkv_heads_split_sizes_0 = const()[name = string("attention_15_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_15_head_transpose = transpose(perm = attention_15_head_transpose_perm_0, x = attention_15_head_reshape)[name = string("transpose_18")]; tensor attention_15_split_qkv_heads_0, tensor attention_15_split_qkv_heads_1, tensor attention_15_split_qkv_heads_2 = split(axis = attention_15_split_qkv_heads_axis_0, split_sizes = attention_15_split_qkv_heads_split_sizes_0, x = attention_15_head_transpose)[name = string("attention_15_split_qkv_heads")]; tensor attention_15_q_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_15_q_rope_lhs_mult")]; int32 attention_15_q_rotate_half_split_num_splits_0 = const()[name = string("attention_15_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_15_q_rotate_half_split_axis_0 = const()[name = string("attention_15_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_15_q_rotate_half_split_0, tensor attention_15_q_rotate_half_split_1 = split(axis = attention_15_q_rotate_half_split_axis_0, num_splits = attention_15_q_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_0)[name = string("attention_15_q_rotate_half_split")]; fp16 attention_15_q_rotate_half_neg_y_0 = const()[name = string("attention_15_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_15_q_rotate_half_neg = mul(x = attention_15_q_rotate_half_split_1, y = attention_15_q_rotate_half_neg_y_0)[name = string("attention_15_q_rotate_half_neg")]; int32 attention_15_q_rotate_half_concat_axis_0 = const()[name = string("attention_15_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_15_q_rotate_half_concat_interleave_0 = const()[name = string("attention_15_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_15_q_rotate_half_concat = concat(axis = attention_15_q_rotate_half_concat_axis_0, interleave = attention_15_q_rotate_half_concat_interleave_0, values = (attention_15_q_rotate_half_neg, attention_15_q_rotate_half_split_0))[name = string("attention_15_q_rotate_half_concat")]; tensor attention_15_q_rope_rhs_mult = mul(x = attention_15_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_q_rope_rhs_mult")]; tensor attention_15_q_rope = add(x = attention_15_q_rope_lhs_mult, y = attention_15_q_rope_rhs_mult)[name = string("attention_15_q_rope")]; tensor attention_15_k_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_15_k_rope_lhs_mult")]; int32 attention_15_k_rotate_half_split_num_splits_0 = const()[name = string("attention_15_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_15_k_rotate_half_split_axis_0 = const()[name = string("attention_15_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_15_k_rotate_half_split_0, tensor attention_15_k_rotate_half_split_1 = split(axis = attention_15_k_rotate_half_split_axis_0, num_splits = attention_15_k_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_1)[name = string("attention_15_k_rotate_half_split")]; fp16 attention_15_k_rotate_half_neg_y_0 = const()[name = string("attention_15_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_15_k_rotate_half_neg = mul(x = attention_15_k_rotate_half_split_1, y = attention_15_k_rotate_half_neg_y_0)[name = string("attention_15_k_rotate_half_neg")]; int32 attention_15_k_rotate_half_concat_axis_0 = const()[name = string("attention_15_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_15_k_rotate_half_concat_interleave_0 = const()[name = string("attention_15_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_15_k_rotate_half_concat = concat(axis = attention_15_k_rotate_half_concat_axis_0, interleave = attention_15_k_rotate_half_concat_interleave_0, values = (attention_15_k_rotate_half_neg, attention_15_k_rotate_half_split_0))[name = string("attention_15_k_rotate_half_concat")]; tensor attention_15_k_rope_rhs_mult = mul(x = attention_15_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_k_rope_rhs_mult")]; tensor attention_15_k_rope = add(x = attention_15_k_rope_lhs_mult, y = attention_15_k_rope_rhs_mult)[name = string("attention_15_k_rope")]; int32 attention_15_q_splits_axis_0 = const()[name = string("attention_15_q_splits_axis_0"), val = int32(1)]; int32 attention_15_q_splits_num_splits_0 = const()[name = string("attention_15_q_splits_num_splits_0"), val = int32(2)]; tensor attention_15_q_splits_0, tensor attention_15_q_splits_1 = split(axis = attention_15_q_splits_axis_0, num_splits = attention_15_q_splits_num_splits_0, x = attention_15_q_rope)[name = string("attention_15_q_splits")]; tensor attention_15_update_begin_0_values0_0 = const()[name = string("attention_15_update_begin_0_values0_0"), val = tensor([15])]; tensor attention_15_update_begin_0_values1_0 = const()[name = string("attention_15_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_15_update_begin_0_values3_0 = const()[name = string("attention_15_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_15_update_begin_0_axis_0 = const()[name = string("attention_15_update_begin_0_axis_0"), val = int32(0)]; bool attention_15_update_begin_0_interleave_0 = const()[name = string("attention_15_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_15_update_begin_0 = concat(axis = attention_15_update_begin_0_axis_0, interleave = attention_15_update_begin_0_interleave_0, values = (attention_15_update_begin_0_values0_0, attention_15_update_begin_0_values1_0, query_pos1, attention_15_update_begin_0_values3_0))[name = string("attention_15_update_begin_0")]; tensor attention_15_update_end_0_values0_0 = const()[name = string("attention_15_update_end_0_values0_0"), val = tensor([16])]; tensor attention_15_update_end_0_values1_0 = const()[name = string("attention_15_update_end_0_values1_0"), val = tensor([2])]; tensor attention_15_update_end_0_values3_0 = const()[name = string("attention_15_update_end_0_values3_0"), val = tensor([64])]; int32 attention_15_update_end_0_axis_0 = const()[name = string("attention_15_update_end_0_axis_0"), val = int32(0)]; bool attention_15_update_end_0_interleave_0 = const()[name = string("attention_15_update_end_0_interleave_0"), val = bool(false)]; tensor attention_15_update_end_0 = concat(axis = attention_15_update_end_0_axis_0, interleave = attention_15_update_end_0_interleave_0, values = (attention_15_update_end_0_values0_0, attention_15_update_end_0_values1_0, end_pos_0, attention_15_update_end_0_values3_0))[name = string("attention_15_update_end_0")]; tensor attention_15_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_updated_key_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_key_cache_0_squeeze_mask_0, update = attention_15_k_rope, x = coreml_update_state_28)[name = string("attention_15_updated_key_cache_0")]; write_state(data = attention_15_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_30_write_state")]; tensor coreml_update_state_30 = read_state(input = key_cache_state)[name = string("coreml_update_state_30")]; tensor attention_15_key_cache_begin_0 = const()[name = string("attention_15_key_cache_begin_0"), val = tensor([15, 0, 0, 0])]; tensor attention_15_key_cache_end_0 = const()[name = string("attention_15_key_cache_end_0"), val = tensor([16, 2, 512, 64])]; tensor attention_15_key_cache_squeeze_mask_0 = const()[name = string("attention_15_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_key_cache = slice_by_index(begin = attention_15_key_cache_begin_0, end = attention_15_key_cache_end_0, squeeze_mask = attention_15_key_cache_squeeze_mask_0, x = coreml_update_state_30)[name = string("attention_15_key_cache")]; int32 attention_15_key_cache_head_axis_0 = const()[name = string("attention_15_key_cache_head_axis_0"), val = int32(1)]; int32 attention_15_key_cache_head_num_splits_0 = const()[name = string("attention_15_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_15_key_cache_head_0, tensor attention_15_key_cache_head_1 = split(axis = attention_15_key_cache_head_axis_0, num_splits = attention_15_key_cache_head_num_splits_0, x = attention_15_key_cache)[name = string("attention_15_key_cache_head")]; tensor attention_15_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_updated_value_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_value_cache_0_squeeze_mask_0, update = attention_15_split_qkv_heads_2, x = coreml_update_state_29)[name = string("attention_15_updated_value_cache_0")]; write_state(data = attention_15_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_31_write_state")]; tensor coreml_update_state_31 = read_state(input = value_cache_state)[name = string("coreml_update_state_31")]; tensor attention_15_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_15_slice_current_layer_value_cache_begin_0"), val = tensor([15, 0, 0, 0])]; tensor attention_15_slice_current_layer_value_cache_end_0 = const()[name = string("attention_15_slice_current_layer_value_cache_end_0"), val = tensor([16, 2, 512, 64])]; tensor attention_15_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_15_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_slice_current_layer_value_cache = slice_by_index(begin = attention_15_slice_current_layer_value_cache_begin_0, end = attention_15_slice_current_layer_value_cache_end_0, squeeze_mask = attention_15_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_31)[name = string("attention_15_slice_current_layer_value_cache")]; int32 attention_15_slice_value_cache_heads_axis_0 = const()[name = string("attention_15_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_15_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_15_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_15_slice_value_cache_heads_0, tensor attention_15_slice_value_cache_heads_1 = split(axis = attention_15_slice_value_cache_heads_axis_0, num_splits = attention_15_slice_value_cache_heads_num_splits_0, x = attention_15_slice_current_layer_value_cache)[name = string("attention_15_slice_value_cache_heads")]; bool attention_15_scores_0_transpose_y_0 = const()[name = string("attention_15_scores_0_transpose_y_0"), val = bool(true)]; bool attention_15_scores_0_transpose_x_0 = const()[name = string("attention_15_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_15_scores_0 = matmul(transpose_x = attention_15_scores_0_transpose_x_0, transpose_y = attention_15_scores_0_transpose_y_0, x = attention_15_key_cache_head_0, y = attention_15_q_splits_0)[name = string("attention_15_scores_0")]; fp16 attention_15_scaled_scores_0_y_0 = const()[name = string("attention_15_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_15_scaled_scores_0 = mul(x = attention_15_scores_0, y = attention_15_scaled_scores_0_y_0)[name = string("attention_15_scaled_scores_0")]; tensor attention_15_masked_scaled_scores_0 = add(x = attention_15_scaled_scores_0, y = transpose_0)[name = string("attention_15_masked_scaled_scores_0")]; int32 softmax_30_axis_0 = const()[name = string("softmax_30_axis_0"), val = int32(-2)]; tensor softmax_30 = softmax(axis = softmax_30_axis_0, x = attention_15_masked_scaled_scores_0)[name = string("softmax_30")]; bool attention_15_attention_0_transpose_x_0 = const()[name = string("attention_15_attention_0_transpose_x_0"), val = bool(true)]; bool attention_15_attention_0_transpose_y_0 = const()[name = string("attention_15_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_15_attention_0 = matmul(transpose_x = attention_15_attention_0_transpose_x_0, transpose_y = attention_15_attention_0_transpose_y_0, x = softmax_30, y = attention_15_slice_value_cache_heads_0)[name = string("attention_15_attention_0")]; bool attention_15_scores_1_transpose_y_0 = const()[name = string("attention_15_scores_1_transpose_y_0"), val = bool(true)]; bool attention_15_scores_1_transpose_x_0 = const()[name = string("attention_15_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_15_scores_1 = matmul(transpose_x = attention_15_scores_1_transpose_x_0, transpose_y = attention_15_scores_1_transpose_y_0, x = attention_15_key_cache_head_1, y = attention_15_q_splits_1)[name = string("attention_15_scores_1")]; fp16 attention_15_scaled_scores_1_y_0 = const()[name = string("attention_15_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_15_scaled_scores_1 = mul(x = attention_15_scores_1, y = attention_15_scaled_scores_1_y_0)[name = string("attention_15_scaled_scores_1")]; tensor attention_15_masked_scaled_scores_1 = add(x = attention_15_scaled_scores_1, y = transpose_0)[name = string("attention_15_masked_scaled_scores_1")]; int32 softmax_31_axis_0 = const()[name = string("softmax_31_axis_0"), val = int32(-2)]; tensor softmax_31 = softmax(axis = softmax_31_axis_0, x = attention_15_masked_scaled_scores_1)[name = string("softmax_31")]; bool attention_15_attention_1_transpose_x_0 = const()[name = string("attention_15_attention_1_transpose_x_0"), val = bool(true)]; bool attention_15_attention_1_transpose_y_0 = const()[name = string("attention_15_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_15_attention_1 = matmul(transpose_x = attention_15_attention_1_transpose_x_0, transpose_y = attention_15_attention_1_transpose_y_0, x = softmax_31, y = attention_15_slice_value_cache_heads_1)[name = string("attention_15_attention_1")]; int32 attention_15_concat_attention_all_heads_axis_0 = const()[name = string("attention_15_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_15_concat_attention_all_heads_interleave_0 = const()[name = string("attention_15_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_15_concat_attention_all_heads = concat(axis = attention_15_concat_attention_all_heads_axis_0, interleave = attention_15_concat_attention_all_heads_interleave_0, values = (attention_15_attention_0, attention_15_attention_1))[name = string("attention_15_concat_attention_all_heads")]; tensor attention_15_channels_first_retransposed_perm_0 = const()[name = string("attention_15_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_15_reshaped_shape_0 = const()[name = string("attention_15_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_15_channels_first_retransposed = transpose(perm = attention_15_channels_first_retransposed_perm_0, x = attention_15_concat_attention_all_heads)[name = string("transpose_17")]; tensor attention_15_reshaped = reshape(shape = attention_15_reshaped_shape_0, x = attention_15_channels_first_retransposed)[name = string("attention_15_reshaped")]; tensor attention_15_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466247552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466849728))))[name = string("attention_15_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_71 = constexpr_blockwise_shift_scale(data = attention_15_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466878464))))[name = string("constexpr_blockwise_shift_scale_71")]; tensor attention_15_outproj_strides_0 = const()[name = string("attention_15_outproj_strides_0"), val = tensor([1])]; string attention_15_outproj_pad_type_0 = const()[name = string("attention_15_outproj_pad_type_0"), val = string("valid")]; tensor attention_15_outproj_pad_0 = const()[name = string("attention_15_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_15_outproj_dilations_0 = const()[name = string("attention_15_outproj_dilations_0"), val = tensor([1])]; int32 attention_15_outproj_groups_0 = const()[name = string("attention_15_outproj_groups_0"), val = int32(1)]; tensor attention_15_outproj = conv(dilations = attention_15_outproj_dilations_0, groups = attention_15_outproj_groups_0, pad = attention_15_outproj_pad_0, pad_type = attention_15_outproj_pad_type_0, strides = attention_15_outproj_strides_0, weight = constexpr_blockwise_shift_scale_71, x = attention_15_reshaped)[name = string("attention_15_outproj")]; tensor block_15_residual_1 = add(x = block_14_residual_2, y = attention_15_outproj)[name = string("block_15_residual_1")]; tensor block_15_ffn_rmsnorm_abs = abs(x = block_15_residual_1)[name = string("block_15_ffn_rmsnorm_abs")]; tensor block_15_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_15_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_15_ffn_rmsnorm_maxval = reduce_max(axes = block_15_ffn_rmsnorm_maxval_axes_0, keep_dims = block_15_ffn_rmsnorm_maxval_keep_dims_0, x = block_15_ffn_rmsnorm_abs)[name = string("block_15_ffn_rmsnorm_maxval")]; fp16 block_15_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_15_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_15_ffn_rmsnorm_maxval_clipped = clip(alpha = block_15_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_15_ffn_rmsnorm_maxval_clipped_beta_0, x = block_15_ffn_rmsnorm_maxval)[name = string("block_15_ffn_rmsnorm_maxval_clipped")]; tensor block_15_ffn_rmsnorm_scaled = real_div(x = block_15_residual_1, y = block_15_ffn_rmsnorm_maxval_clipped)[name = string("block_15_ffn_rmsnorm_scaled")]; tensor block_15_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_15_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_15_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_15_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_15_ffn_rmsnorm_scaled)[name = string("block_15_ffn_rmsnorm_squared_sum")]; fp16 block_15_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_15_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_15_ffn_rmsnorm_rsqrt_epsilon_0, x = block_15_ffn_rmsnorm_squared_sum)[name = string("block_15_ffn_rmsnorm_rsqrt")]; fp16 block_15_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_15_ffn_rmsnorm_dim_scaled = mul(x = block_15_ffn_rmsnorm_scaled, y = block_15_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_15_ffn_rmsnorm_dim_scaled")]; tensor block_15_ffn_rmsnorm_normalized = mul(x = block_15_ffn_rmsnorm_dim_scaled, y = block_15_ffn_rmsnorm_rsqrt)[name = string("block_15_ffn_rmsnorm_normalized")]; tensor block_15_ffn_rmsnorm_y_0 = const()[name = string("block_15_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466880320)))]; tensor block_15_ffn_rmsnorm = mul(x = block_15_ffn_rmsnorm_normalized, y = block_15_ffn_rmsnorm_y_0)[name = string("block_15_ffn_rmsnorm")]; tensor block_15_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466882176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470150848))))[name = string("block_15_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_72 = constexpr_blockwise_shift_scale(data = block_15_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470306560))))[name = string("constexpr_blockwise_shift_scale_72")]; tensor block_15_ffn_inproj_strides_0 = const()[name = string("block_15_ffn_inproj_strides_0"), val = tensor([1])]; string block_15_ffn_inproj_pad_type_0 = const()[name = string("block_15_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_15_ffn_inproj_pad_0 = const()[name = string("block_15_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_15_ffn_inproj_dilations_0 = const()[name = string("block_15_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_15_ffn_inproj_groups_0 = const()[name = string("block_15_ffn_inproj_groups_0"), val = int32(1)]; tensor block_15_ffn_inproj = conv(dilations = block_15_ffn_inproj_dilations_0, groups = block_15_ffn_inproj_groups_0, pad = block_15_ffn_inproj_pad_0, pad_type = block_15_ffn_inproj_pad_type_0, strides = block_15_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_72, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_inproj")]; tensor block_15_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470316352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473585024))))[name = string("block_15_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_73 = constexpr_blockwise_shift_scale(data = block_15_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473740736))))[name = string("constexpr_blockwise_shift_scale_73")]; tensor block_15_ffn_g_strides_0 = const()[name = string("block_15_ffn_g_strides_0"), val = tensor([1])]; string block_15_ffn_g_pad_type_0 = const()[name = string("block_15_ffn_g_pad_type_0"), val = string("valid")]; tensor block_15_ffn_g_pad_0 = const()[name = string("block_15_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_15_ffn_g_dilations_0 = const()[name = string("block_15_ffn_g_dilations_0"), val = tensor([1])]; int32 block_15_ffn_g_groups_0 = const()[name = string("block_15_ffn_g_groups_0"), val = int32(1)]; tensor block_15_ffn_g = conv(dilations = block_15_ffn_g_dilations_0, groups = block_15_ffn_g_groups_0, pad = block_15_ffn_g_pad_0, pad_type = block_15_ffn_g_pad_type_0, strides = block_15_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_73, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_g")]; tensor block_15_ffn_g_activation = silu(x = block_15_ffn_g)[name = string("block_15_ffn_g_activation")]; tensor block_15_ffn_x_gated = mul(x = block_15_ffn_inproj, y = block_15_ffn_g_activation)[name = string("block_15_ffn_x_gated")]; tensor block_15_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473750528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477019200))))[name = string("block_15_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_74 = constexpr_blockwise_shift_scale(data = block_15_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477047936))))[name = string("constexpr_blockwise_shift_scale_74")]; tensor block_15_ffn_outproj_strides_0 = const()[name = string("block_15_ffn_outproj_strides_0"), val = tensor([1])]; string block_15_ffn_outproj_pad_type_0 = const()[name = string("block_15_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_15_ffn_outproj_pad_0 = const()[name = string("block_15_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_15_ffn_outproj_dilations_0 = const()[name = string("block_15_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_15_ffn_outproj_groups_0 = const()[name = string("block_15_ffn_outproj_groups_0"), val = int32(1)]; tensor block_15_ffn_outproj = conv(dilations = block_15_ffn_outproj_dilations_0, groups = block_15_ffn_outproj_groups_0, pad = block_15_ffn_outproj_pad_0, pad_type = block_15_ffn_outproj_pad_type_0, strides = block_15_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_74, x = block_15_ffn_x_gated)[name = string("block_15_ffn_outproj")]; tensor block_15_residual_2 = add(x = block_15_ffn_outproj, y = block_15_residual_1)[name = string("block_15_residual_2")]; tensor block_16_attention_rmsnorm_abs = abs(x = block_15_residual_2)[name = string("block_16_attention_rmsnorm_abs")]; tensor block_16_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_16_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_16_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_16_attention_rmsnorm_maxval = reduce_max(axes = block_16_attention_rmsnorm_maxval_axes_0, keep_dims = block_16_attention_rmsnorm_maxval_keep_dims_0, x = block_16_attention_rmsnorm_abs)[name = string("block_16_attention_rmsnorm_maxval")]; fp16 block_16_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_16_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_16_attention_rmsnorm_maxval_clipped = clip(alpha = block_16_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_16_attention_rmsnorm_maxval_clipped_beta_0, x = block_16_attention_rmsnorm_maxval)[name = string("block_16_attention_rmsnorm_maxval_clipped")]; tensor block_16_attention_rmsnorm_scaled = real_div(x = block_15_residual_2, y = block_16_attention_rmsnorm_maxval_clipped)[name = string("block_16_attention_rmsnorm_scaled")]; tensor block_16_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_16_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_16_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_16_attention_rmsnorm_squared_sum_keep_dims_0, x = block_16_attention_rmsnorm_scaled)[name = string("block_16_attention_rmsnorm_squared_sum")]; fp16 block_16_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_16_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_16_attention_rmsnorm_rsqrt_epsilon_0, x = block_16_attention_rmsnorm_squared_sum)[name = string("block_16_attention_rmsnorm_rsqrt")]; fp16 block_16_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_16_attention_rmsnorm_dim_scaled = mul(x = block_16_attention_rmsnorm_scaled, y = block_16_attention_rmsnorm_dim_scaled_y_0)[name = string("block_16_attention_rmsnorm_dim_scaled")]; tensor block_16_attention_rmsnorm_normalized = mul(x = block_16_attention_rmsnorm_dim_scaled, y = block_16_attention_rmsnorm_rsqrt)[name = string("block_16_attention_rmsnorm_normalized")]; tensor block_16_attention_rmsnorm_y_0 = const()[name = string("block_16_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477049792)))]; tensor block_16_attention_rmsnorm = mul(x = block_16_attention_rmsnorm_normalized, y = block_16_attention_rmsnorm_y_0)[name = string("block_16_attention_rmsnorm")]; tensor attention_16_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477051648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477825856))))[name = string("attention_16_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_75 = constexpr_blockwise_shift_scale(data = attention_16_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477862784))))[name = string("constexpr_blockwise_shift_scale_75")]; tensor attention_16_qkvproj_bias_0 = const()[name = string("attention_16_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477865152)))]; tensor attention_16_qkvproj_strides_0 = const()[name = string("attention_16_qkvproj_strides_0"), val = tensor([1])]; string attention_16_qkvproj_pad_type_0 = const()[name = string("attention_16_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_16_qkvproj_pad_0 = const()[name = string("attention_16_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_16_qkvproj_dilations_0 = const()[name = string("attention_16_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_16_qkvproj_groups_0 = const()[name = string("attention_16_qkvproj_groups_0"), val = int32(1)]; tensor attention_16_qkvproj = conv(bias = attention_16_qkvproj_bias_0, dilations = attention_16_qkvproj_dilations_0, groups = attention_16_qkvproj_groups_0, pad = attention_16_qkvproj_pad_0, pad_type = attention_16_qkvproj_pad_type_0, strides = attention_16_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_75, x = block_16_attention_rmsnorm)[name = string("attention_16_qkvproj")]; tensor attention_16_head_reshape_shape_0 = const()[name = string("attention_16_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_16_head_reshape = reshape(shape = attention_16_head_reshape_shape_0, x = attention_16_qkvproj)[name = string("attention_16_head_reshape")]; tensor attention_16_head_transpose_perm_0 = const()[name = string("attention_16_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_16_split_qkv_heads_axis_0 = const()[name = string("attention_16_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_16_split_qkv_heads_split_sizes_0 = const()[name = string("attention_16_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_16_head_transpose = transpose(perm = attention_16_head_transpose_perm_0, x = attention_16_head_reshape)[name = string("transpose_16")]; tensor attention_16_split_qkv_heads_0, tensor attention_16_split_qkv_heads_1, tensor attention_16_split_qkv_heads_2 = split(axis = attention_16_split_qkv_heads_axis_0, split_sizes = attention_16_split_qkv_heads_split_sizes_0, x = attention_16_head_transpose)[name = string("attention_16_split_qkv_heads")]; tensor attention_16_q_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_16_q_rope_lhs_mult")]; int32 attention_16_q_rotate_half_split_num_splits_0 = const()[name = string("attention_16_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_16_q_rotate_half_split_axis_0 = const()[name = string("attention_16_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_16_q_rotate_half_split_0, tensor attention_16_q_rotate_half_split_1 = split(axis = attention_16_q_rotate_half_split_axis_0, num_splits = attention_16_q_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_0)[name = string("attention_16_q_rotate_half_split")]; fp16 attention_16_q_rotate_half_neg_y_0 = const()[name = string("attention_16_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_16_q_rotate_half_neg = mul(x = attention_16_q_rotate_half_split_1, y = attention_16_q_rotate_half_neg_y_0)[name = string("attention_16_q_rotate_half_neg")]; int32 attention_16_q_rotate_half_concat_axis_0 = const()[name = string("attention_16_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_16_q_rotate_half_concat_interleave_0 = const()[name = string("attention_16_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_16_q_rotate_half_concat = concat(axis = attention_16_q_rotate_half_concat_axis_0, interleave = attention_16_q_rotate_half_concat_interleave_0, values = (attention_16_q_rotate_half_neg, attention_16_q_rotate_half_split_0))[name = string("attention_16_q_rotate_half_concat")]; tensor attention_16_q_rope_rhs_mult = mul(x = attention_16_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_q_rope_rhs_mult")]; tensor attention_16_q_rope = add(x = attention_16_q_rope_lhs_mult, y = attention_16_q_rope_rhs_mult)[name = string("attention_16_q_rope")]; tensor attention_16_k_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_16_k_rope_lhs_mult")]; int32 attention_16_k_rotate_half_split_num_splits_0 = const()[name = string("attention_16_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_16_k_rotate_half_split_axis_0 = const()[name = string("attention_16_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_16_k_rotate_half_split_0, tensor attention_16_k_rotate_half_split_1 = split(axis = attention_16_k_rotate_half_split_axis_0, num_splits = attention_16_k_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_1)[name = string("attention_16_k_rotate_half_split")]; fp16 attention_16_k_rotate_half_neg_y_0 = const()[name = string("attention_16_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_16_k_rotate_half_neg = mul(x = attention_16_k_rotate_half_split_1, y = attention_16_k_rotate_half_neg_y_0)[name = string("attention_16_k_rotate_half_neg")]; int32 attention_16_k_rotate_half_concat_axis_0 = const()[name = string("attention_16_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_16_k_rotate_half_concat_interleave_0 = const()[name = string("attention_16_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_16_k_rotate_half_concat = concat(axis = attention_16_k_rotate_half_concat_axis_0, interleave = attention_16_k_rotate_half_concat_interleave_0, values = (attention_16_k_rotate_half_neg, attention_16_k_rotate_half_split_0))[name = string("attention_16_k_rotate_half_concat")]; tensor attention_16_k_rope_rhs_mult = mul(x = attention_16_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_k_rope_rhs_mult")]; tensor attention_16_k_rope = add(x = attention_16_k_rope_lhs_mult, y = attention_16_k_rope_rhs_mult)[name = string("attention_16_k_rope")]; int32 attention_16_q_splits_axis_0 = const()[name = string("attention_16_q_splits_axis_0"), val = int32(1)]; int32 attention_16_q_splits_num_splits_0 = const()[name = string("attention_16_q_splits_num_splits_0"), val = int32(2)]; tensor attention_16_q_splits_0, tensor attention_16_q_splits_1 = split(axis = attention_16_q_splits_axis_0, num_splits = attention_16_q_splits_num_splits_0, x = attention_16_q_rope)[name = string("attention_16_q_splits")]; tensor attention_16_update_begin_0_values0_0 = const()[name = string("attention_16_update_begin_0_values0_0"), val = tensor([16])]; tensor attention_16_update_begin_0_values1_0 = const()[name = string("attention_16_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_16_update_begin_0_values3_0 = const()[name = string("attention_16_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_16_update_begin_0_axis_0 = const()[name = string("attention_16_update_begin_0_axis_0"), val = int32(0)]; bool attention_16_update_begin_0_interleave_0 = const()[name = string("attention_16_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_16_update_begin_0 = concat(axis = attention_16_update_begin_0_axis_0, interleave = attention_16_update_begin_0_interleave_0, values = (attention_16_update_begin_0_values0_0, attention_16_update_begin_0_values1_0, query_pos1, attention_16_update_begin_0_values3_0))[name = string("attention_16_update_begin_0")]; tensor attention_16_update_end_0_values0_0 = const()[name = string("attention_16_update_end_0_values0_0"), val = tensor([17])]; tensor attention_16_update_end_0_values1_0 = const()[name = string("attention_16_update_end_0_values1_0"), val = tensor([2])]; tensor attention_16_update_end_0_values3_0 = const()[name = string("attention_16_update_end_0_values3_0"), val = tensor([64])]; int32 attention_16_update_end_0_axis_0 = const()[name = string("attention_16_update_end_0_axis_0"), val = int32(0)]; bool attention_16_update_end_0_interleave_0 = const()[name = string("attention_16_update_end_0_interleave_0"), val = bool(false)]; tensor attention_16_update_end_0 = concat(axis = attention_16_update_end_0_axis_0, interleave = attention_16_update_end_0_interleave_0, values = (attention_16_update_end_0_values0_0, attention_16_update_end_0_values1_0, end_pos_0, attention_16_update_end_0_values3_0))[name = string("attention_16_update_end_0")]; tensor attention_16_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_updated_key_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_key_cache_0_squeeze_mask_0, update = attention_16_k_rope, x = coreml_update_state_30)[name = string("attention_16_updated_key_cache_0")]; write_state(data = attention_16_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_32 = read_state(input = key_cache_state)[name = string("coreml_update_state_32")]; tensor attention_16_key_cache_begin_0 = const()[name = string("attention_16_key_cache_begin_0"), val = tensor([16, 0, 0, 0])]; tensor attention_16_key_cache_end_0 = const()[name = string("attention_16_key_cache_end_0"), val = tensor([17, 2, 512, 64])]; tensor attention_16_key_cache_squeeze_mask_0 = const()[name = string("attention_16_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_key_cache = slice_by_index(begin = attention_16_key_cache_begin_0, end = attention_16_key_cache_end_0, squeeze_mask = attention_16_key_cache_squeeze_mask_0, x = coreml_update_state_32)[name = string("attention_16_key_cache")]; int32 attention_16_key_cache_head_axis_0 = const()[name = string("attention_16_key_cache_head_axis_0"), val = int32(1)]; int32 attention_16_key_cache_head_num_splits_0 = const()[name = string("attention_16_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_16_key_cache_head_0, tensor attention_16_key_cache_head_1 = split(axis = attention_16_key_cache_head_axis_0, num_splits = attention_16_key_cache_head_num_splits_0, x = attention_16_key_cache)[name = string("attention_16_key_cache_head")]; tensor attention_16_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_updated_value_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_value_cache_0_squeeze_mask_0, update = attention_16_split_qkv_heads_2, x = coreml_update_state_31)[name = string("attention_16_updated_value_cache_0")]; write_state(data = attention_16_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_33 = read_state(input = value_cache_state)[name = string("coreml_update_state_33")]; tensor attention_16_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_16_slice_current_layer_value_cache_begin_0"), val = tensor([16, 0, 0, 0])]; tensor attention_16_slice_current_layer_value_cache_end_0 = const()[name = string("attention_16_slice_current_layer_value_cache_end_0"), val = tensor([17, 2, 512, 64])]; tensor attention_16_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_16_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_slice_current_layer_value_cache = slice_by_index(begin = attention_16_slice_current_layer_value_cache_begin_0, end = attention_16_slice_current_layer_value_cache_end_0, squeeze_mask = attention_16_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_33)[name = string("attention_16_slice_current_layer_value_cache")]; int32 attention_16_slice_value_cache_heads_axis_0 = const()[name = string("attention_16_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_16_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_16_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_16_slice_value_cache_heads_0, tensor attention_16_slice_value_cache_heads_1 = split(axis = attention_16_slice_value_cache_heads_axis_0, num_splits = attention_16_slice_value_cache_heads_num_splits_0, x = attention_16_slice_current_layer_value_cache)[name = string("attention_16_slice_value_cache_heads")]; bool attention_16_scores_0_transpose_y_0 = const()[name = string("attention_16_scores_0_transpose_y_0"), val = bool(true)]; bool attention_16_scores_0_transpose_x_0 = const()[name = string("attention_16_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_16_scores_0 = matmul(transpose_x = attention_16_scores_0_transpose_x_0, transpose_y = attention_16_scores_0_transpose_y_0, x = attention_16_key_cache_head_0, y = attention_16_q_splits_0)[name = string("attention_16_scores_0")]; fp16 attention_16_scaled_scores_0_y_0 = const()[name = string("attention_16_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_16_scaled_scores_0 = mul(x = attention_16_scores_0, y = attention_16_scaled_scores_0_y_0)[name = string("attention_16_scaled_scores_0")]; tensor attention_16_masked_scaled_scores_0 = add(x = attention_16_scaled_scores_0, y = transpose_0)[name = string("attention_16_masked_scaled_scores_0")]; int32 softmax_32_axis_0 = const()[name = string("softmax_32_axis_0"), val = int32(-2)]; tensor softmax_32 = softmax(axis = softmax_32_axis_0, x = attention_16_masked_scaled_scores_0)[name = string("softmax_32")]; bool attention_16_attention_0_transpose_x_0 = const()[name = string("attention_16_attention_0_transpose_x_0"), val = bool(true)]; bool attention_16_attention_0_transpose_y_0 = const()[name = string("attention_16_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_16_attention_0 = matmul(transpose_x = attention_16_attention_0_transpose_x_0, transpose_y = attention_16_attention_0_transpose_y_0, x = softmax_32, y = attention_16_slice_value_cache_heads_0)[name = string("attention_16_attention_0")]; bool attention_16_scores_1_transpose_y_0 = const()[name = string("attention_16_scores_1_transpose_y_0"), val = bool(true)]; bool attention_16_scores_1_transpose_x_0 = const()[name = string("attention_16_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_16_scores_1 = matmul(transpose_x = attention_16_scores_1_transpose_x_0, transpose_y = attention_16_scores_1_transpose_y_0, x = attention_16_key_cache_head_1, y = attention_16_q_splits_1)[name = string("attention_16_scores_1")]; fp16 attention_16_scaled_scores_1_y_0 = const()[name = string("attention_16_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_16_scaled_scores_1 = mul(x = attention_16_scores_1, y = attention_16_scaled_scores_1_y_0)[name = string("attention_16_scaled_scores_1")]; tensor attention_16_masked_scaled_scores_1 = add(x = attention_16_scaled_scores_1, y = transpose_0)[name = string("attention_16_masked_scaled_scores_1")]; int32 softmax_33_axis_0 = const()[name = string("softmax_33_axis_0"), val = int32(-2)]; tensor softmax_33 = softmax(axis = softmax_33_axis_0, x = attention_16_masked_scaled_scores_1)[name = string("softmax_33")]; bool attention_16_attention_1_transpose_x_0 = const()[name = string("attention_16_attention_1_transpose_x_0"), val = bool(true)]; bool attention_16_attention_1_transpose_y_0 = const()[name = string("attention_16_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_16_attention_1 = matmul(transpose_x = attention_16_attention_1_transpose_x_0, transpose_y = attention_16_attention_1_transpose_y_0, x = softmax_33, y = attention_16_slice_value_cache_heads_1)[name = string("attention_16_attention_1")]; int32 attention_16_concat_attention_all_heads_axis_0 = const()[name = string("attention_16_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_16_concat_attention_all_heads_interleave_0 = const()[name = string("attention_16_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_16_concat_attention_all_heads = concat(axis = attention_16_concat_attention_all_heads_axis_0, interleave = attention_16_concat_attention_all_heads_interleave_0, values = (attention_16_attention_0, attention_16_attention_1))[name = string("attention_16_concat_attention_all_heads")]; tensor attention_16_channels_first_retransposed_perm_0 = const()[name = string("attention_16_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_16_reshaped_shape_0 = const()[name = string("attention_16_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_16_channels_first_retransposed = transpose(perm = attention_16_channels_first_retransposed_perm_0, x = attention_16_concat_attention_all_heads)[name = string("transpose_15")]; tensor attention_16_reshaped = reshape(shape = attention_16_reshaped_shape_0, x = attention_16_channels_first_retransposed)[name = string("attention_16_reshaped")]; tensor attention_16_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477867520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478469696))))[name = string("attention_16_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_76 = constexpr_blockwise_shift_scale(data = attention_16_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478498432))))[name = string("constexpr_blockwise_shift_scale_76")]; tensor attention_16_outproj_strides_0 = const()[name = string("attention_16_outproj_strides_0"), val = tensor([1])]; string attention_16_outproj_pad_type_0 = const()[name = string("attention_16_outproj_pad_type_0"), val = string("valid")]; tensor attention_16_outproj_pad_0 = const()[name = string("attention_16_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_16_outproj_dilations_0 = const()[name = string("attention_16_outproj_dilations_0"), val = tensor([1])]; int32 attention_16_outproj_groups_0 = const()[name = string("attention_16_outproj_groups_0"), val = int32(1)]; tensor attention_16_outproj = conv(dilations = attention_16_outproj_dilations_0, groups = attention_16_outproj_groups_0, pad = attention_16_outproj_pad_0, pad_type = attention_16_outproj_pad_type_0, strides = attention_16_outproj_strides_0, weight = constexpr_blockwise_shift_scale_76, x = attention_16_reshaped)[name = string("attention_16_outproj")]; tensor block_16_residual_1 = add(x = block_15_residual_2, y = attention_16_outproj)[name = string("block_16_residual_1")]; tensor block_16_ffn_rmsnorm_abs = abs(x = block_16_residual_1)[name = string("block_16_ffn_rmsnorm_abs")]; tensor block_16_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_16_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_16_ffn_rmsnorm_maxval = reduce_max(axes = block_16_ffn_rmsnorm_maxval_axes_0, keep_dims = block_16_ffn_rmsnorm_maxval_keep_dims_0, x = block_16_ffn_rmsnorm_abs)[name = string("block_16_ffn_rmsnorm_maxval")]; fp16 block_16_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_16_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_16_ffn_rmsnorm_maxval_clipped = clip(alpha = block_16_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_16_ffn_rmsnorm_maxval_clipped_beta_0, x = block_16_ffn_rmsnorm_maxval)[name = string("block_16_ffn_rmsnorm_maxval_clipped")]; tensor block_16_ffn_rmsnorm_scaled = real_div(x = block_16_residual_1, y = block_16_ffn_rmsnorm_maxval_clipped)[name = string("block_16_ffn_rmsnorm_scaled")]; tensor block_16_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_16_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_16_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_16_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_16_ffn_rmsnorm_scaled)[name = string("block_16_ffn_rmsnorm_squared_sum")]; fp16 block_16_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_16_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_16_ffn_rmsnorm_rsqrt_epsilon_0, x = block_16_ffn_rmsnorm_squared_sum)[name = string("block_16_ffn_rmsnorm_rsqrt")]; fp16 block_16_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_16_ffn_rmsnorm_dim_scaled = mul(x = block_16_ffn_rmsnorm_scaled, y = block_16_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_16_ffn_rmsnorm_dim_scaled")]; tensor block_16_ffn_rmsnorm_normalized = mul(x = block_16_ffn_rmsnorm_dim_scaled, y = block_16_ffn_rmsnorm_rsqrt)[name = string("block_16_ffn_rmsnorm_normalized")]; tensor block_16_ffn_rmsnorm_y_0 = const()[name = string("block_16_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478500288)))]; tensor block_16_ffn_rmsnorm = mul(x = block_16_ffn_rmsnorm_normalized, y = block_16_ffn_rmsnorm_y_0)[name = string("block_16_ffn_rmsnorm")]; tensor block_16_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478502144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481770816))))[name = string("block_16_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_77 = constexpr_blockwise_shift_scale(data = block_16_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481926528))))[name = string("constexpr_blockwise_shift_scale_77")]; tensor block_16_ffn_inproj_strides_0 = const()[name = string("block_16_ffn_inproj_strides_0"), val = tensor([1])]; string block_16_ffn_inproj_pad_type_0 = const()[name = string("block_16_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_16_ffn_inproj_pad_0 = const()[name = string("block_16_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_16_ffn_inproj_dilations_0 = const()[name = string("block_16_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_16_ffn_inproj_groups_0 = const()[name = string("block_16_ffn_inproj_groups_0"), val = int32(1)]; tensor block_16_ffn_inproj = conv(dilations = block_16_ffn_inproj_dilations_0, groups = block_16_ffn_inproj_groups_0, pad = block_16_ffn_inproj_pad_0, pad_type = block_16_ffn_inproj_pad_type_0, strides = block_16_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_77, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_inproj")]; tensor block_16_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481936320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485204992))))[name = string("block_16_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_78 = constexpr_blockwise_shift_scale(data = block_16_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485360704))))[name = string("constexpr_blockwise_shift_scale_78")]; tensor block_16_ffn_g_strides_0 = const()[name = string("block_16_ffn_g_strides_0"), val = tensor([1])]; string block_16_ffn_g_pad_type_0 = const()[name = string("block_16_ffn_g_pad_type_0"), val = string("valid")]; tensor block_16_ffn_g_pad_0 = const()[name = string("block_16_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_16_ffn_g_dilations_0 = const()[name = string("block_16_ffn_g_dilations_0"), val = tensor([1])]; int32 block_16_ffn_g_groups_0 = const()[name = string("block_16_ffn_g_groups_0"), val = int32(1)]; tensor block_16_ffn_g = conv(dilations = block_16_ffn_g_dilations_0, groups = block_16_ffn_g_groups_0, pad = block_16_ffn_g_pad_0, pad_type = block_16_ffn_g_pad_type_0, strides = block_16_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_78, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_g")]; tensor block_16_ffn_g_activation = silu(x = block_16_ffn_g)[name = string("block_16_ffn_g_activation")]; tensor block_16_ffn_x_gated = mul(x = block_16_ffn_inproj, y = block_16_ffn_g_activation)[name = string("block_16_ffn_x_gated")]; tensor block_16_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485370496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488639168))))[name = string("block_16_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_79 = constexpr_blockwise_shift_scale(data = block_16_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488667904))))[name = string("constexpr_blockwise_shift_scale_79")]; tensor block_16_ffn_outproj_strides_0 = const()[name = string("block_16_ffn_outproj_strides_0"), val = tensor([1])]; string block_16_ffn_outproj_pad_type_0 = const()[name = string("block_16_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_16_ffn_outproj_pad_0 = const()[name = string("block_16_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_16_ffn_outproj_dilations_0 = const()[name = string("block_16_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_16_ffn_outproj_groups_0 = const()[name = string("block_16_ffn_outproj_groups_0"), val = int32(1)]; tensor block_16_ffn_outproj = conv(dilations = block_16_ffn_outproj_dilations_0, groups = block_16_ffn_outproj_groups_0, pad = block_16_ffn_outproj_pad_0, pad_type = block_16_ffn_outproj_pad_type_0, strides = block_16_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_79, x = block_16_ffn_x_gated)[name = string("block_16_ffn_outproj")]; tensor block_16_residual_2 = add(x = block_16_ffn_outproj, y = block_16_residual_1)[name = string("block_16_residual_2")]; tensor block_17_attention_rmsnorm_abs = abs(x = block_16_residual_2)[name = string("block_17_attention_rmsnorm_abs")]; tensor block_17_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_17_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_17_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_17_attention_rmsnorm_maxval = reduce_max(axes = block_17_attention_rmsnorm_maxval_axes_0, keep_dims = block_17_attention_rmsnorm_maxval_keep_dims_0, x = block_17_attention_rmsnorm_abs)[name = string("block_17_attention_rmsnorm_maxval")]; fp16 block_17_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_17_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_17_attention_rmsnorm_maxval_clipped = clip(alpha = block_17_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_17_attention_rmsnorm_maxval_clipped_beta_0, x = block_17_attention_rmsnorm_maxval)[name = string("block_17_attention_rmsnorm_maxval_clipped")]; tensor block_17_attention_rmsnorm_scaled = real_div(x = block_16_residual_2, y = block_17_attention_rmsnorm_maxval_clipped)[name = string("block_17_attention_rmsnorm_scaled")]; tensor block_17_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_17_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_17_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_17_attention_rmsnorm_squared_sum_keep_dims_0, x = block_17_attention_rmsnorm_scaled)[name = string("block_17_attention_rmsnorm_squared_sum")]; fp16 block_17_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_17_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_17_attention_rmsnorm_rsqrt_epsilon_0, x = block_17_attention_rmsnorm_squared_sum)[name = string("block_17_attention_rmsnorm_rsqrt")]; fp16 block_17_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_17_attention_rmsnorm_dim_scaled = mul(x = block_17_attention_rmsnorm_scaled, y = block_17_attention_rmsnorm_dim_scaled_y_0)[name = string("block_17_attention_rmsnorm_dim_scaled")]; tensor block_17_attention_rmsnorm_normalized = mul(x = block_17_attention_rmsnorm_dim_scaled, y = block_17_attention_rmsnorm_rsqrt)[name = string("block_17_attention_rmsnorm_normalized")]; tensor block_17_attention_rmsnorm_y_0 = const()[name = string("block_17_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488669760)))]; tensor block_17_attention_rmsnorm = mul(x = block_17_attention_rmsnorm_normalized, y = block_17_attention_rmsnorm_y_0)[name = string("block_17_attention_rmsnorm")]; tensor attention_17_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488671616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489445824))))[name = string("attention_17_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_80 = constexpr_blockwise_shift_scale(data = attention_17_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489482752))))[name = string("constexpr_blockwise_shift_scale_80")]; tensor attention_17_qkvproj_bias_0 = const()[name = string("attention_17_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489485120)))]; tensor attention_17_qkvproj_strides_0 = const()[name = string("attention_17_qkvproj_strides_0"), val = tensor([1])]; string attention_17_qkvproj_pad_type_0 = const()[name = string("attention_17_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_17_qkvproj_pad_0 = const()[name = string("attention_17_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_17_qkvproj_dilations_0 = const()[name = string("attention_17_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_17_qkvproj_groups_0 = const()[name = string("attention_17_qkvproj_groups_0"), val = int32(1)]; tensor attention_17_qkvproj = conv(bias = attention_17_qkvproj_bias_0, dilations = attention_17_qkvproj_dilations_0, groups = attention_17_qkvproj_groups_0, pad = attention_17_qkvproj_pad_0, pad_type = attention_17_qkvproj_pad_type_0, strides = attention_17_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_80, x = block_17_attention_rmsnorm)[name = string("attention_17_qkvproj")]; tensor attention_17_head_reshape_shape_0 = const()[name = string("attention_17_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_17_head_reshape = reshape(shape = attention_17_head_reshape_shape_0, x = attention_17_qkvproj)[name = string("attention_17_head_reshape")]; tensor attention_17_head_transpose_perm_0 = const()[name = string("attention_17_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_17_split_qkv_heads_axis_0 = const()[name = string("attention_17_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_17_split_qkv_heads_split_sizes_0 = const()[name = string("attention_17_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_17_head_transpose = transpose(perm = attention_17_head_transpose_perm_0, x = attention_17_head_reshape)[name = string("transpose_14")]; tensor attention_17_split_qkv_heads_0, tensor attention_17_split_qkv_heads_1, tensor attention_17_split_qkv_heads_2 = split(axis = attention_17_split_qkv_heads_axis_0, split_sizes = attention_17_split_qkv_heads_split_sizes_0, x = attention_17_head_transpose)[name = string("attention_17_split_qkv_heads")]; tensor attention_17_q_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_17_q_rope_lhs_mult")]; int32 attention_17_q_rotate_half_split_num_splits_0 = const()[name = string("attention_17_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_17_q_rotate_half_split_axis_0 = const()[name = string("attention_17_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_17_q_rotate_half_split_0, tensor attention_17_q_rotate_half_split_1 = split(axis = attention_17_q_rotate_half_split_axis_0, num_splits = attention_17_q_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_0)[name = string("attention_17_q_rotate_half_split")]; fp16 attention_17_q_rotate_half_neg_y_0 = const()[name = string("attention_17_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_17_q_rotate_half_neg = mul(x = attention_17_q_rotate_half_split_1, y = attention_17_q_rotate_half_neg_y_0)[name = string("attention_17_q_rotate_half_neg")]; int32 attention_17_q_rotate_half_concat_axis_0 = const()[name = string("attention_17_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_17_q_rotate_half_concat_interleave_0 = const()[name = string("attention_17_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_17_q_rotate_half_concat = concat(axis = attention_17_q_rotate_half_concat_axis_0, interleave = attention_17_q_rotate_half_concat_interleave_0, values = (attention_17_q_rotate_half_neg, attention_17_q_rotate_half_split_0))[name = string("attention_17_q_rotate_half_concat")]; tensor attention_17_q_rope_rhs_mult = mul(x = attention_17_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_q_rope_rhs_mult")]; tensor attention_17_q_rope = add(x = attention_17_q_rope_lhs_mult, y = attention_17_q_rope_rhs_mult)[name = string("attention_17_q_rope")]; tensor attention_17_k_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_17_k_rope_lhs_mult")]; int32 attention_17_k_rotate_half_split_num_splits_0 = const()[name = string("attention_17_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_17_k_rotate_half_split_axis_0 = const()[name = string("attention_17_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_17_k_rotate_half_split_0, tensor attention_17_k_rotate_half_split_1 = split(axis = attention_17_k_rotate_half_split_axis_0, num_splits = attention_17_k_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_1)[name = string("attention_17_k_rotate_half_split")]; fp16 attention_17_k_rotate_half_neg_y_0 = const()[name = string("attention_17_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_17_k_rotate_half_neg = mul(x = attention_17_k_rotate_half_split_1, y = attention_17_k_rotate_half_neg_y_0)[name = string("attention_17_k_rotate_half_neg")]; int32 attention_17_k_rotate_half_concat_axis_0 = const()[name = string("attention_17_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_17_k_rotate_half_concat_interleave_0 = const()[name = string("attention_17_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_17_k_rotate_half_concat = concat(axis = attention_17_k_rotate_half_concat_axis_0, interleave = attention_17_k_rotate_half_concat_interleave_0, values = (attention_17_k_rotate_half_neg, attention_17_k_rotate_half_split_0))[name = string("attention_17_k_rotate_half_concat")]; tensor attention_17_k_rope_rhs_mult = mul(x = attention_17_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_k_rope_rhs_mult")]; tensor attention_17_k_rope = add(x = attention_17_k_rope_lhs_mult, y = attention_17_k_rope_rhs_mult)[name = string("attention_17_k_rope")]; int32 attention_17_q_splits_axis_0 = const()[name = string("attention_17_q_splits_axis_0"), val = int32(1)]; int32 attention_17_q_splits_num_splits_0 = const()[name = string("attention_17_q_splits_num_splits_0"), val = int32(2)]; tensor attention_17_q_splits_0, tensor attention_17_q_splits_1 = split(axis = attention_17_q_splits_axis_0, num_splits = attention_17_q_splits_num_splits_0, x = attention_17_q_rope)[name = string("attention_17_q_splits")]; tensor attention_17_update_begin_0_values0_0 = const()[name = string("attention_17_update_begin_0_values0_0"), val = tensor([17])]; tensor attention_17_update_begin_0_values1_0 = const()[name = string("attention_17_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_17_update_begin_0_values3_0 = const()[name = string("attention_17_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_17_update_begin_0_axis_0 = const()[name = string("attention_17_update_begin_0_axis_0"), val = int32(0)]; bool attention_17_update_begin_0_interleave_0 = const()[name = string("attention_17_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_17_update_begin_0 = concat(axis = attention_17_update_begin_0_axis_0, interleave = attention_17_update_begin_0_interleave_0, values = (attention_17_update_begin_0_values0_0, attention_17_update_begin_0_values1_0, query_pos1, attention_17_update_begin_0_values3_0))[name = string("attention_17_update_begin_0")]; tensor attention_17_update_end_0_values0_0 = const()[name = string("attention_17_update_end_0_values0_0"), val = tensor([18])]; tensor attention_17_update_end_0_values1_0 = const()[name = string("attention_17_update_end_0_values1_0"), val = tensor([2])]; tensor attention_17_update_end_0_values3_0 = const()[name = string("attention_17_update_end_0_values3_0"), val = tensor([64])]; int32 attention_17_update_end_0_axis_0 = const()[name = string("attention_17_update_end_0_axis_0"), val = int32(0)]; bool attention_17_update_end_0_interleave_0 = const()[name = string("attention_17_update_end_0_interleave_0"), val = bool(false)]; tensor attention_17_update_end_0 = concat(axis = attention_17_update_end_0_axis_0, interleave = attention_17_update_end_0_interleave_0, values = (attention_17_update_end_0_values0_0, attention_17_update_end_0_values1_0, end_pos_0, attention_17_update_end_0_values3_0))[name = string("attention_17_update_end_0")]; tensor attention_17_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_updated_key_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_key_cache_0_squeeze_mask_0, update = attention_17_k_rope, x = coreml_update_state_32)[name = string("attention_17_updated_key_cache_0")]; write_state(data = attention_17_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_34 = read_state(input = key_cache_state)[name = string("coreml_update_state_34")]; tensor attention_17_key_cache_begin_0 = const()[name = string("attention_17_key_cache_begin_0"), val = tensor([17, 0, 0, 0])]; tensor attention_17_key_cache_end_0 = const()[name = string("attention_17_key_cache_end_0"), val = tensor([18, 2, 512, 64])]; tensor attention_17_key_cache_squeeze_mask_0 = const()[name = string("attention_17_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_key_cache = slice_by_index(begin = attention_17_key_cache_begin_0, end = attention_17_key_cache_end_0, squeeze_mask = attention_17_key_cache_squeeze_mask_0, x = coreml_update_state_34)[name = string("attention_17_key_cache")]; int32 attention_17_key_cache_head_axis_0 = const()[name = string("attention_17_key_cache_head_axis_0"), val = int32(1)]; int32 attention_17_key_cache_head_num_splits_0 = const()[name = string("attention_17_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_17_key_cache_head_0, tensor attention_17_key_cache_head_1 = split(axis = attention_17_key_cache_head_axis_0, num_splits = attention_17_key_cache_head_num_splits_0, x = attention_17_key_cache)[name = string("attention_17_key_cache_head")]; tensor attention_17_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_updated_value_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_value_cache_0_squeeze_mask_0, update = attention_17_split_qkv_heads_2, x = coreml_update_state_33)[name = string("attention_17_updated_value_cache_0")]; write_state(data = attention_17_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_35 = read_state(input = value_cache_state)[name = string("coreml_update_state_35")]; tensor attention_17_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_17_slice_current_layer_value_cache_begin_0"), val = tensor([17, 0, 0, 0])]; tensor attention_17_slice_current_layer_value_cache_end_0 = const()[name = string("attention_17_slice_current_layer_value_cache_end_0"), val = tensor([18, 2, 512, 64])]; tensor attention_17_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_17_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_slice_current_layer_value_cache = slice_by_index(begin = attention_17_slice_current_layer_value_cache_begin_0, end = attention_17_slice_current_layer_value_cache_end_0, squeeze_mask = attention_17_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_35)[name = string("attention_17_slice_current_layer_value_cache")]; int32 attention_17_slice_value_cache_heads_axis_0 = const()[name = string("attention_17_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_17_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_17_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_17_slice_value_cache_heads_0, tensor attention_17_slice_value_cache_heads_1 = split(axis = attention_17_slice_value_cache_heads_axis_0, num_splits = attention_17_slice_value_cache_heads_num_splits_0, x = attention_17_slice_current_layer_value_cache)[name = string("attention_17_slice_value_cache_heads")]; bool attention_17_scores_0_transpose_y_0 = const()[name = string("attention_17_scores_0_transpose_y_0"), val = bool(true)]; bool attention_17_scores_0_transpose_x_0 = const()[name = string("attention_17_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_17_scores_0 = matmul(transpose_x = attention_17_scores_0_transpose_x_0, transpose_y = attention_17_scores_0_transpose_y_0, x = attention_17_key_cache_head_0, y = attention_17_q_splits_0)[name = string("attention_17_scores_0")]; fp16 attention_17_scaled_scores_0_y_0 = const()[name = string("attention_17_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_17_scaled_scores_0 = mul(x = attention_17_scores_0, y = attention_17_scaled_scores_0_y_0)[name = string("attention_17_scaled_scores_0")]; tensor attention_17_masked_scaled_scores_0 = add(x = attention_17_scaled_scores_0, y = transpose_0)[name = string("attention_17_masked_scaled_scores_0")]; int32 softmax_34_axis_0 = const()[name = string("softmax_34_axis_0"), val = int32(-2)]; tensor softmax_34 = softmax(axis = softmax_34_axis_0, x = attention_17_masked_scaled_scores_0)[name = string("softmax_34")]; bool attention_17_attention_0_transpose_x_0 = const()[name = string("attention_17_attention_0_transpose_x_0"), val = bool(true)]; bool attention_17_attention_0_transpose_y_0 = const()[name = string("attention_17_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_17_attention_0 = matmul(transpose_x = attention_17_attention_0_transpose_x_0, transpose_y = attention_17_attention_0_transpose_y_0, x = softmax_34, y = attention_17_slice_value_cache_heads_0)[name = string("attention_17_attention_0")]; bool attention_17_scores_1_transpose_y_0 = const()[name = string("attention_17_scores_1_transpose_y_0"), val = bool(true)]; bool attention_17_scores_1_transpose_x_0 = const()[name = string("attention_17_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_17_scores_1 = matmul(transpose_x = attention_17_scores_1_transpose_x_0, transpose_y = attention_17_scores_1_transpose_y_0, x = attention_17_key_cache_head_1, y = attention_17_q_splits_1)[name = string("attention_17_scores_1")]; fp16 attention_17_scaled_scores_1_y_0 = const()[name = string("attention_17_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_17_scaled_scores_1 = mul(x = attention_17_scores_1, y = attention_17_scaled_scores_1_y_0)[name = string("attention_17_scaled_scores_1")]; tensor attention_17_masked_scaled_scores_1 = add(x = attention_17_scaled_scores_1, y = transpose_0)[name = string("attention_17_masked_scaled_scores_1")]; int32 softmax_35_axis_0 = const()[name = string("softmax_35_axis_0"), val = int32(-2)]; tensor softmax_35 = softmax(axis = softmax_35_axis_0, x = attention_17_masked_scaled_scores_1)[name = string("softmax_35")]; bool attention_17_attention_1_transpose_x_0 = const()[name = string("attention_17_attention_1_transpose_x_0"), val = bool(true)]; bool attention_17_attention_1_transpose_y_0 = const()[name = string("attention_17_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_17_attention_1 = matmul(transpose_x = attention_17_attention_1_transpose_x_0, transpose_y = attention_17_attention_1_transpose_y_0, x = softmax_35, y = attention_17_slice_value_cache_heads_1)[name = string("attention_17_attention_1")]; int32 attention_17_concat_attention_all_heads_axis_0 = const()[name = string("attention_17_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_17_concat_attention_all_heads_interleave_0 = const()[name = string("attention_17_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_17_concat_attention_all_heads = concat(axis = attention_17_concat_attention_all_heads_axis_0, interleave = attention_17_concat_attention_all_heads_interleave_0, values = (attention_17_attention_0, attention_17_attention_1))[name = string("attention_17_concat_attention_all_heads")]; tensor attention_17_channels_first_retransposed_perm_0 = const()[name = string("attention_17_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_17_reshaped_shape_0 = const()[name = string("attention_17_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_17_channels_first_retransposed = transpose(perm = attention_17_channels_first_retransposed_perm_0, x = attention_17_concat_attention_all_heads)[name = string("transpose_13")]; tensor attention_17_reshaped = reshape(shape = attention_17_reshaped_shape_0, x = attention_17_channels_first_retransposed)[name = string("attention_17_reshaped")]; tensor attention_17_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489487488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490089664))))[name = string("attention_17_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_81 = constexpr_blockwise_shift_scale(data = attention_17_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490118400))))[name = string("constexpr_blockwise_shift_scale_81")]; tensor attention_17_outproj_strides_0 = const()[name = string("attention_17_outproj_strides_0"), val = tensor([1])]; string attention_17_outproj_pad_type_0 = const()[name = string("attention_17_outproj_pad_type_0"), val = string("valid")]; tensor attention_17_outproj_pad_0 = const()[name = string("attention_17_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_17_outproj_dilations_0 = const()[name = string("attention_17_outproj_dilations_0"), val = tensor([1])]; int32 attention_17_outproj_groups_0 = const()[name = string("attention_17_outproj_groups_0"), val = int32(1)]; tensor attention_17_outproj = conv(dilations = attention_17_outproj_dilations_0, groups = attention_17_outproj_groups_0, pad = attention_17_outproj_pad_0, pad_type = attention_17_outproj_pad_type_0, strides = attention_17_outproj_strides_0, weight = constexpr_blockwise_shift_scale_81, x = attention_17_reshaped)[name = string("attention_17_outproj")]; tensor block_17_residual_1 = add(x = block_16_residual_2, y = attention_17_outproj)[name = string("block_17_residual_1")]; tensor block_17_ffn_rmsnorm_abs = abs(x = block_17_residual_1)[name = string("block_17_ffn_rmsnorm_abs")]; tensor block_17_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_17_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_17_ffn_rmsnorm_maxval = reduce_max(axes = block_17_ffn_rmsnorm_maxval_axes_0, keep_dims = block_17_ffn_rmsnorm_maxval_keep_dims_0, x = block_17_ffn_rmsnorm_abs)[name = string("block_17_ffn_rmsnorm_maxval")]; fp16 block_17_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_17_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_17_ffn_rmsnorm_maxval_clipped = clip(alpha = block_17_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_17_ffn_rmsnorm_maxval_clipped_beta_0, x = block_17_ffn_rmsnorm_maxval)[name = string("block_17_ffn_rmsnorm_maxval_clipped")]; tensor block_17_ffn_rmsnorm_scaled = real_div(x = block_17_residual_1, y = block_17_ffn_rmsnorm_maxval_clipped)[name = string("block_17_ffn_rmsnorm_scaled")]; tensor block_17_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_17_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_17_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_17_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_17_ffn_rmsnorm_scaled)[name = string("block_17_ffn_rmsnorm_squared_sum")]; fp16 block_17_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_17_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_17_ffn_rmsnorm_rsqrt_epsilon_0, x = block_17_ffn_rmsnorm_squared_sum)[name = string("block_17_ffn_rmsnorm_rsqrt")]; fp16 block_17_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_17_ffn_rmsnorm_dim_scaled = mul(x = block_17_ffn_rmsnorm_scaled, y = block_17_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_17_ffn_rmsnorm_dim_scaled")]; tensor block_17_ffn_rmsnorm_normalized = mul(x = block_17_ffn_rmsnorm_dim_scaled, y = block_17_ffn_rmsnorm_rsqrt)[name = string("block_17_ffn_rmsnorm_normalized")]; tensor block_17_ffn_rmsnorm_y_0 = const()[name = string("block_17_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490120256)))]; tensor block_17_ffn_rmsnorm = mul(x = block_17_ffn_rmsnorm_normalized, y = block_17_ffn_rmsnorm_y_0)[name = string("block_17_ffn_rmsnorm")]; tensor block_17_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490122112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493390784))))[name = string("block_17_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_82 = constexpr_blockwise_shift_scale(data = block_17_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493546496))))[name = string("constexpr_blockwise_shift_scale_82")]; tensor block_17_ffn_inproj_strides_0 = const()[name = string("block_17_ffn_inproj_strides_0"), val = tensor([1])]; string block_17_ffn_inproj_pad_type_0 = const()[name = string("block_17_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_17_ffn_inproj_pad_0 = const()[name = string("block_17_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_17_ffn_inproj_dilations_0 = const()[name = string("block_17_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_17_ffn_inproj_groups_0 = const()[name = string("block_17_ffn_inproj_groups_0"), val = int32(1)]; tensor block_17_ffn_inproj = conv(dilations = block_17_ffn_inproj_dilations_0, groups = block_17_ffn_inproj_groups_0, pad = block_17_ffn_inproj_pad_0, pad_type = block_17_ffn_inproj_pad_type_0, strides = block_17_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_82, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_inproj")]; tensor block_17_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496824960))))[name = string("block_17_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_83 = constexpr_blockwise_shift_scale(data = block_17_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496980672))))[name = string("constexpr_blockwise_shift_scale_83")]; tensor block_17_ffn_g_strides_0 = const()[name = string("block_17_ffn_g_strides_0"), val = tensor([1])]; string block_17_ffn_g_pad_type_0 = const()[name = string("block_17_ffn_g_pad_type_0"), val = string("valid")]; tensor block_17_ffn_g_pad_0 = const()[name = string("block_17_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_17_ffn_g_dilations_0 = const()[name = string("block_17_ffn_g_dilations_0"), val = tensor([1])]; int32 block_17_ffn_g_groups_0 = const()[name = string("block_17_ffn_g_groups_0"), val = int32(1)]; tensor block_17_ffn_g = conv(dilations = block_17_ffn_g_dilations_0, groups = block_17_ffn_g_groups_0, pad = block_17_ffn_g_pad_0, pad_type = block_17_ffn_g_pad_type_0, strides = block_17_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_83, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_g")]; tensor block_17_ffn_g_activation = silu(x = block_17_ffn_g)[name = string("block_17_ffn_g_activation")]; tensor block_17_ffn_x_gated = mul(x = block_17_ffn_inproj, y = block_17_ffn_g_activation)[name = string("block_17_ffn_x_gated")]; tensor block_17_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496990464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500259136))))[name = string("block_17_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_84 = constexpr_blockwise_shift_scale(data = block_17_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500287872))))[name = string("constexpr_blockwise_shift_scale_84")]; tensor block_17_ffn_outproj_strides_0 = const()[name = string("block_17_ffn_outproj_strides_0"), val = tensor([1])]; string block_17_ffn_outproj_pad_type_0 = const()[name = string("block_17_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_17_ffn_outproj_pad_0 = const()[name = string("block_17_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_17_ffn_outproj_dilations_0 = const()[name = string("block_17_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_17_ffn_outproj_groups_0 = const()[name = string("block_17_ffn_outproj_groups_0"), val = int32(1)]; tensor block_17_ffn_outproj = conv(dilations = block_17_ffn_outproj_dilations_0, groups = block_17_ffn_outproj_groups_0, pad = block_17_ffn_outproj_pad_0, pad_type = block_17_ffn_outproj_pad_type_0, strides = block_17_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_84, x = block_17_ffn_x_gated)[name = string("block_17_ffn_outproj")]; tensor block_17_residual_2 = add(x = block_17_ffn_outproj, y = block_17_residual_1)[name = string("block_17_residual_2")]; tensor block_18_attention_rmsnorm_abs = abs(x = block_17_residual_2)[name = string("block_18_attention_rmsnorm_abs")]; tensor block_18_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_18_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_18_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_18_attention_rmsnorm_maxval = reduce_max(axes = block_18_attention_rmsnorm_maxval_axes_0, keep_dims = block_18_attention_rmsnorm_maxval_keep_dims_0, x = block_18_attention_rmsnorm_abs)[name = string("block_18_attention_rmsnorm_maxval")]; fp16 block_18_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_18_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_18_attention_rmsnorm_maxval_clipped = clip(alpha = block_18_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_18_attention_rmsnorm_maxval_clipped_beta_0, x = block_18_attention_rmsnorm_maxval)[name = string("block_18_attention_rmsnorm_maxval_clipped")]; tensor block_18_attention_rmsnorm_scaled = real_div(x = block_17_residual_2, y = block_18_attention_rmsnorm_maxval_clipped)[name = string("block_18_attention_rmsnorm_scaled")]; tensor block_18_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_18_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_18_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_18_attention_rmsnorm_squared_sum_keep_dims_0, x = block_18_attention_rmsnorm_scaled)[name = string("block_18_attention_rmsnorm_squared_sum")]; fp16 block_18_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_18_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_18_attention_rmsnorm_rsqrt_epsilon_0, x = block_18_attention_rmsnorm_squared_sum)[name = string("block_18_attention_rmsnorm_rsqrt")]; fp16 block_18_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_18_attention_rmsnorm_dim_scaled = mul(x = block_18_attention_rmsnorm_scaled, y = block_18_attention_rmsnorm_dim_scaled_y_0)[name = string("block_18_attention_rmsnorm_dim_scaled")]; tensor block_18_attention_rmsnorm_normalized = mul(x = block_18_attention_rmsnorm_dim_scaled, y = block_18_attention_rmsnorm_rsqrt)[name = string("block_18_attention_rmsnorm_normalized")]; tensor block_18_attention_rmsnorm_y_0 = const()[name = string("block_18_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500289728)))]; tensor block_18_attention_rmsnorm = mul(x = block_18_attention_rmsnorm_normalized, y = block_18_attention_rmsnorm_y_0)[name = string("block_18_attention_rmsnorm")]; tensor attention_18_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500291584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501065792))))[name = string("attention_18_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_85 = constexpr_blockwise_shift_scale(data = attention_18_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501102720))))[name = string("constexpr_blockwise_shift_scale_85")]; tensor attention_18_qkvproj_bias_0 = const()[name = string("attention_18_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501105088)))]; tensor attention_18_qkvproj_strides_0 = const()[name = string("attention_18_qkvproj_strides_0"), val = tensor([1])]; string attention_18_qkvproj_pad_type_0 = const()[name = string("attention_18_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_18_qkvproj_pad_0 = const()[name = string("attention_18_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_18_qkvproj_dilations_0 = const()[name = string("attention_18_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_18_qkvproj_groups_0 = const()[name = string("attention_18_qkvproj_groups_0"), val = int32(1)]; tensor attention_18_qkvproj = conv(bias = attention_18_qkvproj_bias_0, dilations = attention_18_qkvproj_dilations_0, groups = attention_18_qkvproj_groups_0, pad = attention_18_qkvproj_pad_0, pad_type = attention_18_qkvproj_pad_type_0, strides = attention_18_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_85, x = block_18_attention_rmsnorm)[name = string("attention_18_qkvproj")]; tensor attention_18_head_reshape_shape_0 = const()[name = string("attention_18_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_18_head_reshape = reshape(shape = attention_18_head_reshape_shape_0, x = attention_18_qkvproj)[name = string("attention_18_head_reshape")]; tensor attention_18_head_transpose_perm_0 = const()[name = string("attention_18_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_18_split_qkv_heads_axis_0 = const()[name = string("attention_18_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_18_split_qkv_heads_split_sizes_0 = const()[name = string("attention_18_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_18_head_transpose = transpose(perm = attention_18_head_transpose_perm_0, x = attention_18_head_reshape)[name = string("transpose_12")]; tensor attention_18_split_qkv_heads_0, tensor attention_18_split_qkv_heads_1, tensor attention_18_split_qkv_heads_2 = split(axis = attention_18_split_qkv_heads_axis_0, split_sizes = attention_18_split_qkv_heads_split_sizes_0, x = attention_18_head_transpose)[name = string("attention_18_split_qkv_heads")]; tensor attention_18_q_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_18_q_rope_lhs_mult")]; int32 attention_18_q_rotate_half_split_num_splits_0 = const()[name = string("attention_18_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_18_q_rotate_half_split_axis_0 = const()[name = string("attention_18_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_18_q_rotate_half_split_0, tensor attention_18_q_rotate_half_split_1 = split(axis = attention_18_q_rotate_half_split_axis_0, num_splits = attention_18_q_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_0)[name = string("attention_18_q_rotate_half_split")]; fp16 attention_18_q_rotate_half_neg_y_0 = const()[name = string("attention_18_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_18_q_rotate_half_neg = mul(x = attention_18_q_rotate_half_split_1, y = attention_18_q_rotate_half_neg_y_0)[name = string("attention_18_q_rotate_half_neg")]; int32 attention_18_q_rotate_half_concat_axis_0 = const()[name = string("attention_18_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_18_q_rotate_half_concat_interleave_0 = const()[name = string("attention_18_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_18_q_rotate_half_concat = concat(axis = attention_18_q_rotate_half_concat_axis_0, interleave = attention_18_q_rotate_half_concat_interleave_0, values = (attention_18_q_rotate_half_neg, attention_18_q_rotate_half_split_0))[name = string("attention_18_q_rotate_half_concat")]; tensor attention_18_q_rope_rhs_mult = mul(x = attention_18_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_q_rope_rhs_mult")]; tensor attention_18_q_rope = add(x = attention_18_q_rope_lhs_mult, y = attention_18_q_rope_rhs_mult)[name = string("attention_18_q_rope")]; tensor attention_18_k_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_18_k_rope_lhs_mult")]; int32 attention_18_k_rotate_half_split_num_splits_0 = const()[name = string("attention_18_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_18_k_rotate_half_split_axis_0 = const()[name = string("attention_18_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_18_k_rotate_half_split_0, tensor attention_18_k_rotate_half_split_1 = split(axis = attention_18_k_rotate_half_split_axis_0, num_splits = attention_18_k_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_1)[name = string("attention_18_k_rotate_half_split")]; fp16 attention_18_k_rotate_half_neg_y_0 = const()[name = string("attention_18_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_18_k_rotate_half_neg = mul(x = attention_18_k_rotate_half_split_1, y = attention_18_k_rotate_half_neg_y_0)[name = string("attention_18_k_rotate_half_neg")]; int32 attention_18_k_rotate_half_concat_axis_0 = const()[name = string("attention_18_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_18_k_rotate_half_concat_interleave_0 = const()[name = string("attention_18_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_18_k_rotate_half_concat = concat(axis = attention_18_k_rotate_half_concat_axis_0, interleave = attention_18_k_rotate_half_concat_interleave_0, values = (attention_18_k_rotate_half_neg, attention_18_k_rotate_half_split_0))[name = string("attention_18_k_rotate_half_concat")]; tensor attention_18_k_rope_rhs_mult = mul(x = attention_18_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_k_rope_rhs_mult")]; tensor attention_18_k_rope = add(x = attention_18_k_rope_lhs_mult, y = attention_18_k_rope_rhs_mult)[name = string("attention_18_k_rope")]; int32 attention_18_q_splits_axis_0 = const()[name = string("attention_18_q_splits_axis_0"), val = int32(1)]; int32 attention_18_q_splits_num_splits_0 = const()[name = string("attention_18_q_splits_num_splits_0"), val = int32(2)]; tensor attention_18_q_splits_0, tensor attention_18_q_splits_1 = split(axis = attention_18_q_splits_axis_0, num_splits = attention_18_q_splits_num_splits_0, x = attention_18_q_rope)[name = string("attention_18_q_splits")]; tensor attention_18_update_begin_0_values0_0 = const()[name = string("attention_18_update_begin_0_values0_0"), val = tensor([18])]; tensor attention_18_update_begin_0_values1_0 = const()[name = string("attention_18_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_18_update_begin_0_values3_0 = const()[name = string("attention_18_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_18_update_begin_0_axis_0 = const()[name = string("attention_18_update_begin_0_axis_0"), val = int32(0)]; bool attention_18_update_begin_0_interleave_0 = const()[name = string("attention_18_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_18_update_begin_0 = concat(axis = attention_18_update_begin_0_axis_0, interleave = attention_18_update_begin_0_interleave_0, values = (attention_18_update_begin_0_values0_0, attention_18_update_begin_0_values1_0, query_pos1, attention_18_update_begin_0_values3_0))[name = string("attention_18_update_begin_0")]; tensor attention_18_update_end_0_values0_0 = const()[name = string("attention_18_update_end_0_values0_0"), val = tensor([19])]; tensor attention_18_update_end_0_values1_0 = const()[name = string("attention_18_update_end_0_values1_0"), val = tensor([2])]; tensor attention_18_update_end_0_values3_0 = const()[name = string("attention_18_update_end_0_values3_0"), val = tensor([64])]; int32 attention_18_update_end_0_axis_0 = const()[name = string("attention_18_update_end_0_axis_0"), val = int32(0)]; bool attention_18_update_end_0_interleave_0 = const()[name = string("attention_18_update_end_0_interleave_0"), val = bool(false)]; tensor attention_18_update_end_0 = concat(axis = attention_18_update_end_0_axis_0, interleave = attention_18_update_end_0_interleave_0, values = (attention_18_update_end_0_values0_0, attention_18_update_end_0_values1_0, end_pos_0, attention_18_update_end_0_values3_0))[name = string("attention_18_update_end_0")]; tensor attention_18_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_updated_key_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_key_cache_0_squeeze_mask_0, update = attention_18_k_rope, x = coreml_update_state_34)[name = string("attention_18_updated_key_cache_0")]; write_state(data = attention_18_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = key_cache_state)[name = string("coreml_update_state_36")]; tensor attention_18_key_cache_begin_0 = const()[name = string("attention_18_key_cache_begin_0"), val = tensor([18, 0, 0, 0])]; tensor attention_18_key_cache_end_0 = const()[name = string("attention_18_key_cache_end_0"), val = tensor([19, 2, 512, 64])]; tensor attention_18_key_cache_squeeze_mask_0 = const()[name = string("attention_18_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_key_cache = slice_by_index(begin = attention_18_key_cache_begin_0, end = attention_18_key_cache_end_0, squeeze_mask = attention_18_key_cache_squeeze_mask_0, x = coreml_update_state_36)[name = string("attention_18_key_cache")]; int32 attention_18_key_cache_head_axis_0 = const()[name = string("attention_18_key_cache_head_axis_0"), val = int32(1)]; int32 attention_18_key_cache_head_num_splits_0 = const()[name = string("attention_18_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_18_key_cache_head_0, tensor attention_18_key_cache_head_1 = split(axis = attention_18_key_cache_head_axis_0, num_splits = attention_18_key_cache_head_num_splits_0, x = attention_18_key_cache)[name = string("attention_18_key_cache_head")]; tensor attention_18_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_updated_value_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_value_cache_0_squeeze_mask_0, update = attention_18_split_qkv_heads_2, x = coreml_update_state_35)[name = string("attention_18_updated_value_cache_0")]; write_state(data = attention_18_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = value_cache_state)[name = string("coreml_update_state_37")]; tensor attention_18_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_18_slice_current_layer_value_cache_begin_0"), val = tensor([18, 0, 0, 0])]; tensor attention_18_slice_current_layer_value_cache_end_0 = const()[name = string("attention_18_slice_current_layer_value_cache_end_0"), val = tensor([19, 2, 512, 64])]; tensor attention_18_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_18_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_slice_current_layer_value_cache = slice_by_index(begin = attention_18_slice_current_layer_value_cache_begin_0, end = attention_18_slice_current_layer_value_cache_end_0, squeeze_mask = attention_18_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_37)[name = string("attention_18_slice_current_layer_value_cache")]; int32 attention_18_slice_value_cache_heads_axis_0 = const()[name = string("attention_18_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_18_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_18_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_18_slice_value_cache_heads_0, tensor attention_18_slice_value_cache_heads_1 = split(axis = attention_18_slice_value_cache_heads_axis_0, num_splits = attention_18_slice_value_cache_heads_num_splits_0, x = attention_18_slice_current_layer_value_cache)[name = string("attention_18_slice_value_cache_heads")]; bool attention_18_scores_0_transpose_y_0 = const()[name = string("attention_18_scores_0_transpose_y_0"), val = bool(true)]; bool attention_18_scores_0_transpose_x_0 = const()[name = string("attention_18_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_18_scores_0 = matmul(transpose_x = attention_18_scores_0_transpose_x_0, transpose_y = attention_18_scores_0_transpose_y_0, x = attention_18_key_cache_head_0, y = attention_18_q_splits_0)[name = string("attention_18_scores_0")]; fp16 attention_18_scaled_scores_0_y_0 = const()[name = string("attention_18_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_18_scaled_scores_0 = mul(x = attention_18_scores_0, y = attention_18_scaled_scores_0_y_0)[name = string("attention_18_scaled_scores_0")]; tensor attention_18_masked_scaled_scores_0 = add(x = attention_18_scaled_scores_0, y = transpose_0)[name = string("attention_18_masked_scaled_scores_0")]; int32 softmax_36_axis_0 = const()[name = string("softmax_36_axis_0"), val = int32(-2)]; tensor softmax_36 = softmax(axis = softmax_36_axis_0, x = attention_18_masked_scaled_scores_0)[name = string("softmax_36")]; bool attention_18_attention_0_transpose_x_0 = const()[name = string("attention_18_attention_0_transpose_x_0"), val = bool(true)]; bool attention_18_attention_0_transpose_y_0 = const()[name = string("attention_18_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_18_attention_0 = matmul(transpose_x = attention_18_attention_0_transpose_x_0, transpose_y = attention_18_attention_0_transpose_y_0, x = softmax_36, y = attention_18_slice_value_cache_heads_0)[name = string("attention_18_attention_0")]; bool attention_18_scores_1_transpose_y_0 = const()[name = string("attention_18_scores_1_transpose_y_0"), val = bool(true)]; bool attention_18_scores_1_transpose_x_0 = const()[name = string("attention_18_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_18_scores_1 = matmul(transpose_x = attention_18_scores_1_transpose_x_0, transpose_y = attention_18_scores_1_transpose_y_0, x = attention_18_key_cache_head_1, y = attention_18_q_splits_1)[name = string("attention_18_scores_1")]; fp16 attention_18_scaled_scores_1_y_0 = const()[name = string("attention_18_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_18_scaled_scores_1 = mul(x = attention_18_scores_1, y = attention_18_scaled_scores_1_y_0)[name = string("attention_18_scaled_scores_1")]; tensor attention_18_masked_scaled_scores_1 = add(x = attention_18_scaled_scores_1, y = transpose_0)[name = string("attention_18_masked_scaled_scores_1")]; int32 softmax_37_axis_0 = const()[name = string("softmax_37_axis_0"), val = int32(-2)]; tensor softmax_37 = softmax(axis = softmax_37_axis_0, x = attention_18_masked_scaled_scores_1)[name = string("softmax_37")]; bool attention_18_attention_1_transpose_x_0 = const()[name = string("attention_18_attention_1_transpose_x_0"), val = bool(true)]; bool attention_18_attention_1_transpose_y_0 = const()[name = string("attention_18_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_18_attention_1 = matmul(transpose_x = attention_18_attention_1_transpose_x_0, transpose_y = attention_18_attention_1_transpose_y_0, x = softmax_37, y = attention_18_slice_value_cache_heads_1)[name = string("attention_18_attention_1")]; int32 attention_18_concat_attention_all_heads_axis_0 = const()[name = string("attention_18_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_18_concat_attention_all_heads_interleave_0 = const()[name = string("attention_18_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_18_concat_attention_all_heads = concat(axis = attention_18_concat_attention_all_heads_axis_0, interleave = attention_18_concat_attention_all_heads_interleave_0, values = (attention_18_attention_0, attention_18_attention_1))[name = string("attention_18_concat_attention_all_heads")]; tensor attention_18_channels_first_retransposed_perm_0 = const()[name = string("attention_18_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_18_reshaped_shape_0 = const()[name = string("attention_18_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_18_channels_first_retransposed = transpose(perm = attention_18_channels_first_retransposed_perm_0, x = attention_18_concat_attention_all_heads)[name = string("transpose_11")]; tensor attention_18_reshaped = reshape(shape = attention_18_reshaped_shape_0, x = attention_18_channels_first_retransposed)[name = string("attention_18_reshaped")]; tensor attention_18_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501107456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501709632))))[name = string("attention_18_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_86 = constexpr_blockwise_shift_scale(data = attention_18_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501738368))))[name = string("constexpr_blockwise_shift_scale_86")]; tensor attention_18_outproj_strides_0 = const()[name = string("attention_18_outproj_strides_0"), val = tensor([1])]; string attention_18_outproj_pad_type_0 = const()[name = string("attention_18_outproj_pad_type_0"), val = string("valid")]; tensor attention_18_outproj_pad_0 = const()[name = string("attention_18_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_18_outproj_dilations_0 = const()[name = string("attention_18_outproj_dilations_0"), val = tensor([1])]; int32 attention_18_outproj_groups_0 = const()[name = string("attention_18_outproj_groups_0"), val = int32(1)]; tensor attention_18_outproj = conv(dilations = attention_18_outproj_dilations_0, groups = attention_18_outproj_groups_0, pad = attention_18_outproj_pad_0, pad_type = attention_18_outproj_pad_type_0, strides = attention_18_outproj_strides_0, weight = constexpr_blockwise_shift_scale_86, x = attention_18_reshaped)[name = string("attention_18_outproj")]; tensor block_18_residual_1 = add(x = block_17_residual_2, y = attention_18_outproj)[name = string("block_18_residual_1")]; tensor block_18_ffn_rmsnorm_abs = abs(x = block_18_residual_1)[name = string("block_18_ffn_rmsnorm_abs")]; tensor block_18_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_18_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_18_ffn_rmsnorm_maxval = reduce_max(axes = block_18_ffn_rmsnorm_maxval_axes_0, keep_dims = block_18_ffn_rmsnorm_maxval_keep_dims_0, x = block_18_ffn_rmsnorm_abs)[name = string("block_18_ffn_rmsnorm_maxval")]; fp16 block_18_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_18_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_18_ffn_rmsnorm_maxval_clipped = clip(alpha = block_18_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_18_ffn_rmsnorm_maxval_clipped_beta_0, x = block_18_ffn_rmsnorm_maxval)[name = string("block_18_ffn_rmsnorm_maxval_clipped")]; tensor block_18_ffn_rmsnorm_scaled = real_div(x = block_18_residual_1, y = block_18_ffn_rmsnorm_maxval_clipped)[name = string("block_18_ffn_rmsnorm_scaled")]; tensor block_18_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_18_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_18_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_18_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_18_ffn_rmsnorm_scaled)[name = string("block_18_ffn_rmsnorm_squared_sum")]; fp16 block_18_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_18_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_18_ffn_rmsnorm_rsqrt_epsilon_0, x = block_18_ffn_rmsnorm_squared_sum)[name = string("block_18_ffn_rmsnorm_rsqrt")]; fp16 block_18_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_18_ffn_rmsnorm_dim_scaled = mul(x = block_18_ffn_rmsnorm_scaled, y = block_18_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_18_ffn_rmsnorm_dim_scaled")]; tensor block_18_ffn_rmsnorm_normalized = mul(x = block_18_ffn_rmsnorm_dim_scaled, y = block_18_ffn_rmsnorm_rsqrt)[name = string("block_18_ffn_rmsnorm_normalized")]; tensor block_18_ffn_rmsnorm_y_0 = const()[name = string("block_18_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501740224)))]; tensor block_18_ffn_rmsnorm = mul(x = block_18_ffn_rmsnorm_normalized, y = block_18_ffn_rmsnorm_y_0)[name = string("block_18_ffn_rmsnorm")]; tensor block_18_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501742080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505010752))))[name = string("block_18_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_87 = constexpr_blockwise_shift_scale(data = block_18_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505166464))))[name = string("constexpr_blockwise_shift_scale_87")]; tensor block_18_ffn_inproj_strides_0 = const()[name = string("block_18_ffn_inproj_strides_0"), val = tensor([1])]; string block_18_ffn_inproj_pad_type_0 = const()[name = string("block_18_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_18_ffn_inproj_pad_0 = const()[name = string("block_18_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_18_ffn_inproj_dilations_0 = const()[name = string("block_18_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_18_ffn_inproj_groups_0 = const()[name = string("block_18_ffn_inproj_groups_0"), val = int32(1)]; tensor block_18_ffn_inproj = conv(dilations = block_18_ffn_inproj_dilations_0, groups = block_18_ffn_inproj_groups_0, pad = block_18_ffn_inproj_pad_0, pad_type = block_18_ffn_inproj_pad_type_0, strides = block_18_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_87, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_inproj")]; tensor block_18_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505176256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508444928))))[name = string("block_18_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_88 = constexpr_blockwise_shift_scale(data = block_18_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508600640))))[name = string("constexpr_blockwise_shift_scale_88")]; tensor block_18_ffn_g_strides_0 = const()[name = string("block_18_ffn_g_strides_0"), val = tensor([1])]; string block_18_ffn_g_pad_type_0 = const()[name = string("block_18_ffn_g_pad_type_0"), val = string("valid")]; tensor block_18_ffn_g_pad_0 = const()[name = string("block_18_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_18_ffn_g_dilations_0 = const()[name = string("block_18_ffn_g_dilations_0"), val = tensor([1])]; int32 block_18_ffn_g_groups_0 = const()[name = string("block_18_ffn_g_groups_0"), val = int32(1)]; tensor block_18_ffn_g = conv(dilations = block_18_ffn_g_dilations_0, groups = block_18_ffn_g_groups_0, pad = block_18_ffn_g_pad_0, pad_type = block_18_ffn_g_pad_type_0, strides = block_18_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_88, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_g")]; tensor block_18_ffn_g_activation = silu(x = block_18_ffn_g)[name = string("block_18_ffn_g_activation")]; tensor block_18_ffn_x_gated = mul(x = block_18_ffn_inproj, y = block_18_ffn_g_activation)[name = string("block_18_ffn_x_gated")]; tensor block_18_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508610432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511879104))))[name = string("block_18_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_89 = constexpr_blockwise_shift_scale(data = block_18_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511907840))))[name = string("constexpr_blockwise_shift_scale_89")]; tensor block_18_ffn_outproj_strides_0 = const()[name = string("block_18_ffn_outproj_strides_0"), val = tensor([1])]; string block_18_ffn_outproj_pad_type_0 = const()[name = string("block_18_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_18_ffn_outproj_pad_0 = const()[name = string("block_18_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_18_ffn_outproj_dilations_0 = const()[name = string("block_18_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_18_ffn_outproj_groups_0 = const()[name = string("block_18_ffn_outproj_groups_0"), val = int32(1)]; tensor block_18_ffn_outproj = conv(dilations = block_18_ffn_outproj_dilations_0, groups = block_18_ffn_outproj_groups_0, pad = block_18_ffn_outproj_pad_0, pad_type = block_18_ffn_outproj_pad_type_0, strides = block_18_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_89, x = block_18_ffn_x_gated)[name = string("block_18_ffn_outproj")]; tensor block_18_residual_2 = add(x = block_18_ffn_outproj, y = block_18_residual_1)[name = string("block_18_residual_2")]; tensor block_19_attention_rmsnorm_abs = abs(x = block_18_residual_2)[name = string("block_19_attention_rmsnorm_abs")]; tensor block_19_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_19_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_19_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_19_attention_rmsnorm_maxval = reduce_max(axes = block_19_attention_rmsnorm_maxval_axes_0, keep_dims = block_19_attention_rmsnorm_maxval_keep_dims_0, x = block_19_attention_rmsnorm_abs)[name = string("block_19_attention_rmsnorm_maxval")]; fp16 block_19_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_19_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_19_attention_rmsnorm_maxval_clipped = clip(alpha = block_19_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_19_attention_rmsnorm_maxval_clipped_beta_0, x = block_19_attention_rmsnorm_maxval)[name = string("block_19_attention_rmsnorm_maxval_clipped")]; tensor block_19_attention_rmsnorm_scaled = real_div(x = block_18_residual_2, y = block_19_attention_rmsnorm_maxval_clipped)[name = string("block_19_attention_rmsnorm_scaled")]; tensor block_19_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_19_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_19_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_19_attention_rmsnorm_squared_sum_keep_dims_0, x = block_19_attention_rmsnorm_scaled)[name = string("block_19_attention_rmsnorm_squared_sum")]; fp16 block_19_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_19_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_19_attention_rmsnorm_rsqrt_epsilon_0, x = block_19_attention_rmsnorm_squared_sum)[name = string("block_19_attention_rmsnorm_rsqrt")]; fp16 block_19_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_19_attention_rmsnorm_dim_scaled = mul(x = block_19_attention_rmsnorm_scaled, y = block_19_attention_rmsnorm_dim_scaled_y_0)[name = string("block_19_attention_rmsnorm_dim_scaled")]; tensor block_19_attention_rmsnorm_normalized = mul(x = block_19_attention_rmsnorm_dim_scaled, y = block_19_attention_rmsnorm_rsqrt)[name = string("block_19_attention_rmsnorm_normalized")]; tensor block_19_attention_rmsnorm_y_0 = const()[name = string("block_19_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511909696)))]; tensor block_19_attention_rmsnorm = mul(x = block_19_attention_rmsnorm_normalized, y = block_19_attention_rmsnorm_y_0)[name = string("block_19_attention_rmsnorm")]; tensor attention_19_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511911552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512685760))))[name = string("attention_19_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_90 = constexpr_blockwise_shift_scale(data = attention_19_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512722688))))[name = string("constexpr_blockwise_shift_scale_90")]; tensor attention_19_qkvproj_bias_0 = const()[name = string("attention_19_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512725056)))]; tensor attention_19_qkvproj_strides_0 = const()[name = string("attention_19_qkvproj_strides_0"), val = tensor([1])]; string attention_19_qkvproj_pad_type_0 = const()[name = string("attention_19_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_19_qkvproj_pad_0 = const()[name = string("attention_19_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_19_qkvproj_dilations_0 = const()[name = string("attention_19_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_19_qkvproj_groups_0 = const()[name = string("attention_19_qkvproj_groups_0"), val = int32(1)]; tensor attention_19_qkvproj = conv(bias = attention_19_qkvproj_bias_0, dilations = attention_19_qkvproj_dilations_0, groups = attention_19_qkvproj_groups_0, pad = attention_19_qkvproj_pad_0, pad_type = attention_19_qkvproj_pad_type_0, strides = attention_19_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_90, x = block_19_attention_rmsnorm)[name = string("attention_19_qkvproj")]; tensor attention_19_head_reshape_shape_0 = const()[name = string("attention_19_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_19_head_reshape = reshape(shape = attention_19_head_reshape_shape_0, x = attention_19_qkvproj)[name = string("attention_19_head_reshape")]; tensor attention_19_head_transpose_perm_0 = const()[name = string("attention_19_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_19_split_qkv_heads_axis_0 = const()[name = string("attention_19_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_19_split_qkv_heads_split_sizes_0 = const()[name = string("attention_19_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_19_head_transpose = transpose(perm = attention_19_head_transpose_perm_0, x = attention_19_head_reshape)[name = string("transpose_10")]; tensor attention_19_split_qkv_heads_0, tensor attention_19_split_qkv_heads_1, tensor attention_19_split_qkv_heads_2 = split(axis = attention_19_split_qkv_heads_axis_0, split_sizes = attention_19_split_qkv_heads_split_sizes_0, x = attention_19_head_transpose)[name = string("attention_19_split_qkv_heads")]; tensor attention_19_q_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_19_q_rope_lhs_mult")]; int32 attention_19_q_rotate_half_split_num_splits_0 = const()[name = string("attention_19_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_19_q_rotate_half_split_axis_0 = const()[name = string("attention_19_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_19_q_rotate_half_split_0, tensor attention_19_q_rotate_half_split_1 = split(axis = attention_19_q_rotate_half_split_axis_0, num_splits = attention_19_q_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_0)[name = string("attention_19_q_rotate_half_split")]; fp16 attention_19_q_rotate_half_neg_y_0 = const()[name = string("attention_19_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_19_q_rotate_half_neg = mul(x = attention_19_q_rotate_half_split_1, y = attention_19_q_rotate_half_neg_y_0)[name = string("attention_19_q_rotate_half_neg")]; int32 attention_19_q_rotate_half_concat_axis_0 = const()[name = string("attention_19_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_19_q_rotate_half_concat_interleave_0 = const()[name = string("attention_19_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_19_q_rotate_half_concat = concat(axis = attention_19_q_rotate_half_concat_axis_0, interleave = attention_19_q_rotate_half_concat_interleave_0, values = (attention_19_q_rotate_half_neg, attention_19_q_rotate_half_split_0))[name = string("attention_19_q_rotate_half_concat")]; tensor attention_19_q_rope_rhs_mult = mul(x = attention_19_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_q_rope_rhs_mult")]; tensor attention_19_q_rope = add(x = attention_19_q_rope_lhs_mult, y = attention_19_q_rope_rhs_mult)[name = string("attention_19_q_rope")]; tensor attention_19_k_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_19_k_rope_lhs_mult")]; int32 attention_19_k_rotate_half_split_num_splits_0 = const()[name = string("attention_19_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_19_k_rotate_half_split_axis_0 = const()[name = string("attention_19_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_19_k_rotate_half_split_0, tensor attention_19_k_rotate_half_split_1 = split(axis = attention_19_k_rotate_half_split_axis_0, num_splits = attention_19_k_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_1)[name = string("attention_19_k_rotate_half_split")]; fp16 attention_19_k_rotate_half_neg_y_0 = const()[name = string("attention_19_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_19_k_rotate_half_neg = mul(x = attention_19_k_rotate_half_split_1, y = attention_19_k_rotate_half_neg_y_0)[name = string("attention_19_k_rotate_half_neg")]; int32 attention_19_k_rotate_half_concat_axis_0 = const()[name = string("attention_19_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_19_k_rotate_half_concat_interleave_0 = const()[name = string("attention_19_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_19_k_rotate_half_concat = concat(axis = attention_19_k_rotate_half_concat_axis_0, interleave = attention_19_k_rotate_half_concat_interleave_0, values = (attention_19_k_rotate_half_neg, attention_19_k_rotate_half_split_0))[name = string("attention_19_k_rotate_half_concat")]; tensor attention_19_k_rope_rhs_mult = mul(x = attention_19_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_k_rope_rhs_mult")]; tensor attention_19_k_rope = add(x = attention_19_k_rope_lhs_mult, y = attention_19_k_rope_rhs_mult)[name = string("attention_19_k_rope")]; int32 attention_19_q_splits_axis_0 = const()[name = string("attention_19_q_splits_axis_0"), val = int32(1)]; int32 attention_19_q_splits_num_splits_0 = const()[name = string("attention_19_q_splits_num_splits_0"), val = int32(2)]; tensor attention_19_q_splits_0, tensor attention_19_q_splits_1 = split(axis = attention_19_q_splits_axis_0, num_splits = attention_19_q_splits_num_splits_0, x = attention_19_q_rope)[name = string("attention_19_q_splits")]; tensor attention_19_update_begin_0_values0_0 = const()[name = string("attention_19_update_begin_0_values0_0"), val = tensor([19])]; tensor attention_19_update_begin_0_values1_0 = const()[name = string("attention_19_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_19_update_begin_0_values3_0 = const()[name = string("attention_19_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_19_update_begin_0_axis_0 = const()[name = string("attention_19_update_begin_0_axis_0"), val = int32(0)]; bool attention_19_update_begin_0_interleave_0 = const()[name = string("attention_19_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_19_update_begin_0 = concat(axis = attention_19_update_begin_0_axis_0, interleave = attention_19_update_begin_0_interleave_0, values = (attention_19_update_begin_0_values0_0, attention_19_update_begin_0_values1_0, query_pos1, attention_19_update_begin_0_values3_0))[name = string("attention_19_update_begin_0")]; tensor attention_19_update_end_0_values0_0 = const()[name = string("attention_19_update_end_0_values0_0"), val = tensor([20])]; tensor attention_19_update_end_0_values1_0 = const()[name = string("attention_19_update_end_0_values1_0"), val = tensor([2])]; tensor attention_19_update_end_0_values3_0 = const()[name = string("attention_19_update_end_0_values3_0"), val = tensor([64])]; int32 attention_19_update_end_0_axis_0 = const()[name = string("attention_19_update_end_0_axis_0"), val = int32(0)]; bool attention_19_update_end_0_interleave_0 = const()[name = string("attention_19_update_end_0_interleave_0"), val = bool(false)]; tensor attention_19_update_end_0 = concat(axis = attention_19_update_end_0_axis_0, interleave = attention_19_update_end_0_interleave_0, values = (attention_19_update_end_0_values0_0, attention_19_update_end_0_values1_0, end_pos_0, attention_19_update_end_0_values3_0))[name = string("attention_19_update_end_0")]; tensor attention_19_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_updated_key_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_key_cache_0_squeeze_mask_0, update = attention_19_k_rope, x = coreml_update_state_36)[name = string("attention_19_updated_key_cache_0")]; write_state(data = attention_19_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = key_cache_state)[name = string("coreml_update_state_38")]; tensor attention_19_key_cache_begin_0 = const()[name = string("attention_19_key_cache_begin_0"), val = tensor([19, 0, 0, 0])]; tensor attention_19_key_cache_end_0 = const()[name = string("attention_19_key_cache_end_0"), val = tensor([20, 2, 512, 64])]; tensor attention_19_key_cache_squeeze_mask_0 = const()[name = string("attention_19_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_key_cache = slice_by_index(begin = attention_19_key_cache_begin_0, end = attention_19_key_cache_end_0, squeeze_mask = attention_19_key_cache_squeeze_mask_0, x = coreml_update_state_38)[name = string("attention_19_key_cache")]; int32 attention_19_key_cache_head_axis_0 = const()[name = string("attention_19_key_cache_head_axis_0"), val = int32(1)]; int32 attention_19_key_cache_head_num_splits_0 = const()[name = string("attention_19_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_19_key_cache_head_0, tensor attention_19_key_cache_head_1 = split(axis = attention_19_key_cache_head_axis_0, num_splits = attention_19_key_cache_head_num_splits_0, x = attention_19_key_cache)[name = string("attention_19_key_cache_head")]; tensor attention_19_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_updated_value_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_value_cache_0_squeeze_mask_0, update = attention_19_split_qkv_heads_2, x = coreml_update_state_37)[name = string("attention_19_updated_value_cache_0")]; write_state(data = attention_19_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = value_cache_state)[name = string("coreml_update_state_39")]; tensor attention_19_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_19_slice_current_layer_value_cache_begin_0"), val = tensor([19, 0, 0, 0])]; tensor attention_19_slice_current_layer_value_cache_end_0 = const()[name = string("attention_19_slice_current_layer_value_cache_end_0"), val = tensor([20, 2, 512, 64])]; tensor attention_19_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_19_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_slice_current_layer_value_cache = slice_by_index(begin = attention_19_slice_current_layer_value_cache_begin_0, end = attention_19_slice_current_layer_value_cache_end_0, squeeze_mask = attention_19_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_39)[name = string("attention_19_slice_current_layer_value_cache")]; int32 attention_19_slice_value_cache_heads_axis_0 = const()[name = string("attention_19_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_19_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_19_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_19_slice_value_cache_heads_0, tensor attention_19_slice_value_cache_heads_1 = split(axis = attention_19_slice_value_cache_heads_axis_0, num_splits = attention_19_slice_value_cache_heads_num_splits_0, x = attention_19_slice_current_layer_value_cache)[name = string("attention_19_slice_value_cache_heads")]; bool attention_19_scores_0_transpose_y_0 = const()[name = string("attention_19_scores_0_transpose_y_0"), val = bool(true)]; bool attention_19_scores_0_transpose_x_0 = const()[name = string("attention_19_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_19_scores_0 = matmul(transpose_x = attention_19_scores_0_transpose_x_0, transpose_y = attention_19_scores_0_transpose_y_0, x = attention_19_key_cache_head_0, y = attention_19_q_splits_0)[name = string("attention_19_scores_0")]; fp16 attention_19_scaled_scores_0_y_0 = const()[name = string("attention_19_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_19_scaled_scores_0 = mul(x = attention_19_scores_0, y = attention_19_scaled_scores_0_y_0)[name = string("attention_19_scaled_scores_0")]; tensor attention_19_masked_scaled_scores_0 = add(x = attention_19_scaled_scores_0, y = transpose_0)[name = string("attention_19_masked_scaled_scores_0")]; int32 softmax_38_axis_0 = const()[name = string("softmax_38_axis_0"), val = int32(-2)]; tensor softmax_38 = softmax(axis = softmax_38_axis_0, x = attention_19_masked_scaled_scores_0)[name = string("softmax_38")]; bool attention_19_attention_0_transpose_x_0 = const()[name = string("attention_19_attention_0_transpose_x_0"), val = bool(true)]; bool attention_19_attention_0_transpose_y_0 = const()[name = string("attention_19_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_19_attention_0 = matmul(transpose_x = attention_19_attention_0_transpose_x_0, transpose_y = attention_19_attention_0_transpose_y_0, x = softmax_38, y = attention_19_slice_value_cache_heads_0)[name = string("attention_19_attention_0")]; bool attention_19_scores_1_transpose_y_0 = const()[name = string("attention_19_scores_1_transpose_y_0"), val = bool(true)]; bool attention_19_scores_1_transpose_x_0 = const()[name = string("attention_19_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_19_scores_1 = matmul(transpose_x = attention_19_scores_1_transpose_x_0, transpose_y = attention_19_scores_1_transpose_y_0, x = attention_19_key_cache_head_1, y = attention_19_q_splits_1)[name = string("attention_19_scores_1")]; fp16 attention_19_scaled_scores_1_y_0 = const()[name = string("attention_19_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_19_scaled_scores_1 = mul(x = attention_19_scores_1, y = attention_19_scaled_scores_1_y_0)[name = string("attention_19_scaled_scores_1")]; tensor attention_19_masked_scaled_scores_1 = add(x = attention_19_scaled_scores_1, y = transpose_0)[name = string("attention_19_masked_scaled_scores_1")]; int32 softmax_39_axis_0 = const()[name = string("softmax_39_axis_0"), val = int32(-2)]; tensor softmax_39 = softmax(axis = softmax_39_axis_0, x = attention_19_masked_scaled_scores_1)[name = string("softmax_39")]; bool attention_19_attention_1_transpose_x_0 = const()[name = string("attention_19_attention_1_transpose_x_0"), val = bool(true)]; bool attention_19_attention_1_transpose_y_0 = const()[name = string("attention_19_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_19_attention_1 = matmul(transpose_x = attention_19_attention_1_transpose_x_0, transpose_y = attention_19_attention_1_transpose_y_0, x = softmax_39, y = attention_19_slice_value_cache_heads_1)[name = string("attention_19_attention_1")]; int32 attention_19_concat_attention_all_heads_axis_0 = const()[name = string("attention_19_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_19_concat_attention_all_heads_interleave_0 = const()[name = string("attention_19_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_19_concat_attention_all_heads = concat(axis = attention_19_concat_attention_all_heads_axis_0, interleave = attention_19_concat_attention_all_heads_interleave_0, values = (attention_19_attention_0, attention_19_attention_1))[name = string("attention_19_concat_attention_all_heads")]; tensor attention_19_channels_first_retransposed_perm_0 = const()[name = string("attention_19_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_19_reshaped_shape_0 = const()[name = string("attention_19_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_19_channels_first_retransposed = transpose(perm = attention_19_channels_first_retransposed_perm_0, x = attention_19_concat_attention_all_heads)[name = string("transpose_9")]; tensor attention_19_reshaped = reshape(shape = attention_19_reshaped_shape_0, x = attention_19_channels_first_retransposed)[name = string("attention_19_reshaped")]; tensor attention_19_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512727424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513329600))))[name = string("attention_19_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_91 = constexpr_blockwise_shift_scale(data = attention_19_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513358336))))[name = string("constexpr_blockwise_shift_scale_91")]; tensor attention_19_outproj_strides_0 = const()[name = string("attention_19_outproj_strides_0"), val = tensor([1])]; string attention_19_outproj_pad_type_0 = const()[name = string("attention_19_outproj_pad_type_0"), val = string("valid")]; tensor attention_19_outproj_pad_0 = const()[name = string("attention_19_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_19_outproj_dilations_0 = const()[name = string("attention_19_outproj_dilations_0"), val = tensor([1])]; int32 attention_19_outproj_groups_0 = const()[name = string("attention_19_outproj_groups_0"), val = int32(1)]; tensor attention_19_outproj = conv(dilations = attention_19_outproj_dilations_0, groups = attention_19_outproj_groups_0, pad = attention_19_outproj_pad_0, pad_type = attention_19_outproj_pad_type_0, strides = attention_19_outproj_strides_0, weight = constexpr_blockwise_shift_scale_91, x = attention_19_reshaped)[name = string("attention_19_outproj")]; tensor block_19_residual_1 = add(x = block_18_residual_2, y = attention_19_outproj)[name = string("block_19_residual_1")]; tensor block_19_ffn_rmsnorm_abs = abs(x = block_19_residual_1)[name = string("block_19_ffn_rmsnorm_abs")]; tensor block_19_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_19_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_19_ffn_rmsnorm_maxval = reduce_max(axes = block_19_ffn_rmsnorm_maxval_axes_0, keep_dims = block_19_ffn_rmsnorm_maxval_keep_dims_0, x = block_19_ffn_rmsnorm_abs)[name = string("block_19_ffn_rmsnorm_maxval")]; fp16 block_19_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_19_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_19_ffn_rmsnorm_maxval_clipped = clip(alpha = block_19_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_19_ffn_rmsnorm_maxval_clipped_beta_0, x = block_19_ffn_rmsnorm_maxval)[name = string("block_19_ffn_rmsnorm_maxval_clipped")]; tensor block_19_ffn_rmsnorm_scaled = real_div(x = block_19_residual_1, y = block_19_ffn_rmsnorm_maxval_clipped)[name = string("block_19_ffn_rmsnorm_scaled")]; tensor block_19_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_19_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_19_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_19_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_19_ffn_rmsnorm_scaled)[name = string("block_19_ffn_rmsnorm_squared_sum")]; fp16 block_19_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_19_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_19_ffn_rmsnorm_rsqrt_epsilon_0, x = block_19_ffn_rmsnorm_squared_sum)[name = string("block_19_ffn_rmsnorm_rsqrt")]; fp16 block_19_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_19_ffn_rmsnorm_dim_scaled = mul(x = block_19_ffn_rmsnorm_scaled, y = block_19_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_19_ffn_rmsnorm_dim_scaled")]; tensor block_19_ffn_rmsnorm_normalized = mul(x = block_19_ffn_rmsnorm_dim_scaled, y = block_19_ffn_rmsnorm_rsqrt)[name = string("block_19_ffn_rmsnorm_normalized")]; tensor block_19_ffn_rmsnorm_y_0 = const()[name = string("block_19_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513360192)))]; tensor block_19_ffn_rmsnorm = mul(x = block_19_ffn_rmsnorm_normalized, y = block_19_ffn_rmsnorm_y_0)[name = string("block_19_ffn_rmsnorm")]; tensor block_19_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516630720))))[name = string("block_19_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_92 = constexpr_blockwise_shift_scale(data = block_19_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516786432))))[name = string("constexpr_blockwise_shift_scale_92")]; tensor block_19_ffn_inproj_strides_0 = const()[name = string("block_19_ffn_inproj_strides_0"), val = tensor([1])]; string block_19_ffn_inproj_pad_type_0 = const()[name = string("block_19_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_19_ffn_inproj_pad_0 = const()[name = string("block_19_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_19_ffn_inproj_dilations_0 = const()[name = string("block_19_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_19_ffn_inproj_groups_0 = const()[name = string("block_19_ffn_inproj_groups_0"), val = int32(1)]; tensor block_19_ffn_inproj = conv(dilations = block_19_ffn_inproj_dilations_0, groups = block_19_ffn_inproj_groups_0, pad = block_19_ffn_inproj_pad_0, pad_type = block_19_ffn_inproj_pad_type_0, strides = block_19_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_92, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_inproj")]; tensor block_19_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516796224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520064896))))[name = string("block_19_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_93 = constexpr_blockwise_shift_scale(data = block_19_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520220608))))[name = string("constexpr_blockwise_shift_scale_93")]; tensor block_19_ffn_g_strides_0 = const()[name = string("block_19_ffn_g_strides_0"), val = tensor([1])]; string block_19_ffn_g_pad_type_0 = const()[name = string("block_19_ffn_g_pad_type_0"), val = string("valid")]; tensor block_19_ffn_g_pad_0 = const()[name = string("block_19_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_19_ffn_g_dilations_0 = const()[name = string("block_19_ffn_g_dilations_0"), val = tensor([1])]; int32 block_19_ffn_g_groups_0 = const()[name = string("block_19_ffn_g_groups_0"), val = int32(1)]; tensor block_19_ffn_g = conv(dilations = block_19_ffn_g_dilations_0, groups = block_19_ffn_g_groups_0, pad = block_19_ffn_g_pad_0, pad_type = block_19_ffn_g_pad_type_0, strides = block_19_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_93, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_g")]; tensor block_19_ffn_g_activation = silu(x = block_19_ffn_g)[name = string("block_19_ffn_g_activation")]; tensor block_19_ffn_x_gated = mul(x = block_19_ffn_inproj, y = block_19_ffn_g_activation)[name = string("block_19_ffn_x_gated")]; tensor block_19_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520230400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523499072))))[name = string("block_19_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_94 = constexpr_blockwise_shift_scale(data = block_19_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523527808))))[name = string("constexpr_blockwise_shift_scale_94")]; tensor block_19_ffn_outproj_strides_0 = const()[name = string("block_19_ffn_outproj_strides_0"), val = tensor([1])]; string block_19_ffn_outproj_pad_type_0 = const()[name = string("block_19_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_19_ffn_outproj_pad_0 = const()[name = string("block_19_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_19_ffn_outproj_dilations_0 = const()[name = string("block_19_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_19_ffn_outproj_groups_0 = const()[name = string("block_19_ffn_outproj_groups_0"), val = int32(1)]; tensor block_19_ffn_outproj = conv(dilations = block_19_ffn_outproj_dilations_0, groups = block_19_ffn_outproj_groups_0, pad = block_19_ffn_outproj_pad_0, pad_type = block_19_ffn_outproj_pad_type_0, strides = block_19_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_94, x = block_19_ffn_x_gated)[name = string("block_19_ffn_outproj")]; tensor block_19_residual_2 = add(x = block_19_ffn_outproj, y = block_19_residual_1)[name = string("block_19_residual_2")]; tensor block_20_attention_rmsnorm_abs = abs(x = block_19_residual_2)[name = string("block_20_attention_rmsnorm_abs")]; tensor block_20_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_20_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_20_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_20_attention_rmsnorm_maxval = reduce_max(axes = block_20_attention_rmsnorm_maxval_axes_0, keep_dims = block_20_attention_rmsnorm_maxval_keep_dims_0, x = block_20_attention_rmsnorm_abs)[name = string("block_20_attention_rmsnorm_maxval")]; fp16 block_20_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_20_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_20_attention_rmsnorm_maxval_clipped = clip(alpha = block_20_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_20_attention_rmsnorm_maxval_clipped_beta_0, x = block_20_attention_rmsnorm_maxval)[name = string("block_20_attention_rmsnorm_maxval_clipped")]; tensor block_20_attention_rmsnorm_scaled = real_div(x = block_19_residual_2, y = block_20_attention_rmsnorm_maxval_clipped)[name = string("block_20_attention_rmsnorm_scaled")]; tensor block_20_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_20_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_20_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_20_attention_rmsnorm_squared_sum_keep_dims_0, x = block_20_attention_rmsnorm_scaled)[name = string("block_20_attention_rmsnorm_squared_sum")]; fp16 block_20_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_20_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_20_attention_rmsnorm_rsqrt_epsilon_0, x = block_20_attention_rmsnorm_squared_sum)[name = string("block_20_attention_rmsnorm_rsqrt")]; fp16 block_20_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_20_attention_rmsnorm_dim_scaled = mul(x = block_20_attention_rmsnorm_scaled, y = block_20_attention_rmsnorm_dim_scaled_y_0)[name = string("block_20_attention_rmsnorm_dim_scaled")]; tensor block_20_attention_rmsnorm_normalized = mul(x = block_20_attention_rmsnorm_dim_scaled, y = block_20_attention_rmsnorm_rsqrt)[name = string("block_20_attention_rmsnorm_normalized")]; tensor block_20_attention_rmsnorm_y_0 = const()[name = string("block_20_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523529664)))]; tensor block_20_attention_rmsnorm = mul(x = block_20_attention_rmsnorm_normalized, y = block_20_attention_rmsnorm_y_0)[name = string("block_20_attention_rmsnorm")]; tensor attention_20_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523531520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524305728))))[name = string("attention_20_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_95 = constexpr_blockwise_shift_scale(data = attention_20_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524342656))))[name = string("constexpr_blockwise_shift_scale_95")]; tensor attention_20_qkvproj_bias_0 = const()[name = string("attention_20_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524345024)))]; tensor attention_20_qkvproj_strides_0 = const()[name = string("attention_20_qkvproj_strides_0"), val = tensor([1])]; string attention_20_qkvproj_pad_type_0 = const()[name = string("attention_20_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_20_qkvproj_pad_0 = const()[name = string("attention_20_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_20_qkvproj_dilations_0 = const()[name = string("attention_20_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_20_qkvproj_groups_0 = const()[name = string("attention_20_qkvproj_groups_0"), val = int32(1)]; tensor attention_20_qkvproj = conv(bias = attention_20_qkvproj_bias_0, dilations = attention_20_qkvproj_dilations_0, groups = attention_20_qkvproj_groups_0, pad = attention_20_qkvproj_pad_0, pad_type = attention_20_qkvproj_pad_type_0, strides = attention_20_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_95, x = block_20_attention_rmsnorm)[name = string("attention_20_qkvproj")]; tensor attention_20_head_reshape_shape_0 = const()[name = string("attention_20_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_20_head_reshape = reshape(shape = attention_20_head_reshape_shape_0, x = attention_20_qkvproj)[name = string("attention_20_head_reshape")]; tensor attention_20_head_transpose_perm_0 = const()[name = string("attention_20_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_20_split_qkv_heads_axis_0 = const()[name = string("attention_20_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_20_split_qkv_heads_split_sizes_0 = const()[name = string("attention_20_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_20_head_transpose = transpose(perm = attention_20_head_transpose_perm_0, x = attention_20_head_reshape)[name = string("transpose_8")]; tensor attention_20_split_qkv_heads_0, tensor attention_20_split_qkv_heads_1, tensor attention_20_split_qkv_heads_2 = split(axis = attention_20_split_qkv_heads_axis_0, split_sizes = attention_20_split_qkv_heads_split_sizes_0, x = attention_20_head_transpose)[name = string("attention_20_split_qkv_heads")]; tensor attention_20_q_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_20_q_rope_lhs_mult")]; int32 attention_20_q_rotate_half_split_num_splits_0 = const()[name = string("attention_20_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_20_q_rotate_half_split_axis_0 = const()[name = string("attention_20_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_20_q_rotate_half_split_0, tensor attention_20_q_rotate_half_split_1 = split(axis = attention_20_q_rotate_half_split_axis_0, num_splits = attention_20_q_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_0)[name = string("attention_20_q_rotate_half_split")]; fp16 attention_20_q_rotate_half_neg_y_0 = const()[name = string("attention_20_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_20_q_rotate_half_neg = mul(x = attention_20_q_rotate_half_split_1, y = attention_20_q_rotate_half_neg_y_0)[name = string("attention_20_q_rotate_half_neg")]; int32 attention_20_q_rotate_half_concat_axis_0 = const()[name = string("attention_20_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_20_q_rotate_half_concat_interleave_0 = const()[name = string("attention_20_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_20_q_rotate_half_concat = concat(axis = attention_20_q_rotate_half_concat_axis_0, interleave = attention_20_q_rotate_half_concat_interleave_0, values = (attention_20_q_rotate_half_neg, attention_20_q_rotate_half_split_0))[name = string("attention_20_q_rotate_half_concat")]; tensor attention_20_q_rope_rhs_mult = mul(x = attention_20_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_q_rope_rhs_mult")]; tensor attention_20_q_rope = add(x = attention_20_q_rope_lhs_mult, y = attention_20_q_rope_rhs_mult)[name = string("attention_20_q_rope")]; tensor attention_20_k_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_20_k_rope_lhs_mult")]; int32 attention_20_k_rotate_half_split_num_splits_0 = const()[name = string("attention_20_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_20_k_rotate_half_split_axis_0 = const()[name = string("attention_20_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_20_k_rotate_half_split_0, tensor attention_20_k_rotate_half_split_1 = split(axis = attention_20_k_rotate_half_split_axis_0, num_splits = attention_20_k_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_1)[name = string("attention_20_k_rotate_half_split")]; fp16 attention_20_k_rotate_half_neg_y_0 = const()[name = string("attention_20_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_20_k_rotate_half_neg = mul(x = attention_20_k_rotate_half_split_1, y = attention_20_k_rotate_half_neg_y_0)[name = string("attention_20_k_rotate_half_neg")]; int32 attention_20_k_rotate_half_concat_axis_0 = const()[name = string("attention_20_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_20_k_rotate_half_concat_interleave_0 = const()[name = string("attention_20_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_20_k_rotate_half_concat = concat(axis = attention_20_k_rotate_half_concat_axis_0, interleave = attention_20_k_rotate_half_concat_interleave_0, values = (attention_20_k_rotate_half_neg, attention_20_k_rotate_half_split_0))[name = string("attention_20_k_rotate_half_concat")]; tensor attention_20_k_rope_rhs_mult = mul(x = attention_20_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_k_rope_rhs_mult")]; tensor attention_20_k_rope = add(x = attention_20_k_rope_lhs_mult, y = attention_20_k_rope_rhs_mult)[name = string("attention_20_k_rope")]; int32 attention_20_q_splits_axis_0 = const()[name = string("attention_20_q_splits_axis_0"), val = int32(1)]; int32 attention_20_q_splits_num_splits_0 = const()[name = string("attention_20_q_splits_num_splits_0"), val = int32(2)]; tensor attention_20_q_splits_0, tensor attention_20_q_splits_1 = split(axis = attention_20_q_splits_axis_0, num_splits = attention_20_q_splits_num_splits_0, x = attention_20_q_rope)[name = string("attention_20_q_splits")]; tensor attention_20_update_begin_0_values0_0 = const()[name = string("attention_20_update_begin_0_values0_0"), val = tensor([20])]; tensor attention_20_update_begin_0_values1_0 = const()[name = string("attention_20_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_20_update_begin_0_values3_0 = const()[name = string("attention_20_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_20_update_begin_0_axis_0 = const()[name = string("attention_20_update_begin_0_axis_0"), val = int32(0)]; bool attention_20_update_begin_0_interleave_0 = const()[name = string("attention_20_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_20_update_begin_0 = concat(axis = attention_20_update_begin_0_axis_0, interleave = attention_20_update_begin_0_interleave_0, values = (attention_20_update_begin_0_values0_0, attention_20_update_begin_0_values1_0, query_pos1, attention_20_update_begin_0_values3_0))[name = string("attention_20_update_begin_0")]; tensor attention_20_update_end_0_values0_0 = const()[name = string("attention_20_update_end_0_values0_0"), val = tensor([21])]; tensor attention_20_update_end_0_values1_0 = const()[name = string("attention_20_update_end_0_values1_0"), val = tensor([2])]; tensor attention_20_update_end_0_values3_0 = const()[name = string("attention_20_update_end_0_values3_0"), val = tensor([64])]; int32 attention_20_update_end_0_axis_0 = const()[name = string("attention_20_update_end_0_axis_0"), val = int32(0)]; bool attention_20_update_end_0_interleave_0 = const()[name = string("attention_20_update_end_0_interleave_0"), val = bool(false)]; tensor attention_20_update_end_0 = concat(axis = attention_20_update_end_0_axis_0, interleave = attention_20_update_end_0_interleave_0, values = (attention_20_update_end_0_values0_0, attention_20_update_end_0_values1_0, end_pos_0, attention_20_update_end_0_values3_0))[name = string("attention_20_update_end_0")]; tensor attention_20_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_updated_key_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_key_cache_0_squeeze_mask_0, update = attention_20_k_rope, x = coreml_update_state_38)[name = string("attention_20_updated_key_cache_0")]; write_state(data = attention_20_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = key_cache_state)[name = string("coreml_update_state_40")]; tensor attention_20_key_cache_begin_0 = const()[name = string("attention_20_key_cache_begin_0"), val = tensor([20, 0, 0, 0])]; tensor attention_20_key_cache_end_0 = const()[name = string("attention_20_key_cache_end_0"), val = tensor([21, 2, 512, 64])]; tensor attention_20_key_cache_squeeze_mask_0 = const()[name = string("attention_20_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_key_cache = slice_by_index(begin = attention_20_key_cache_begin_0, end = attention_20_key_cache_end_0, squeeze_mask = attention_20_key_cache_squeeze_mask_0, x = coreml_update_state_40)[name = string("attention_20_key_cache")]; int32 attention_20_key_cache_head_axis_0 = const()[name = string("attention_20_key_cache_head_axis_0"), val = int32(1)]; int32 attention_20_key_cache_head_num_splits_0 = const()[name = string("attention_20_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_20_key_cache_head_0, tensor attention_20_key_cache_head_1 = split(axis = attention_20_key_cache_head_axis_0, num_splits = attention_20_key_cache_head_num_splits_0, x = attention_20_key_cache)[name = string("attention_20_key_cache_head")]; tensor attention_20_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_updated_value_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_value_cache_0_squeeze_mask_0, update = attention_20_split_qkv_heads_2, x = coreml_update_state_39)[name = string("attention_20_updated_value_cache_0")]; write_state(data = attention_20_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = value_cache_state)[name = string("coreml_update_state_41")]; tensor attention_20_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_20_slice_current_layer_value_cache_begin_0"), val = tensor([20, 0, 0, 0])]; tensor attention_20_slice_current_layer_value_cache_end_0 = const()[name = string("attention_20_slice_current_layer_value_cache_end_0"), val = tensor([21, 2, 512, 64])]; tensor attention_20_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_20_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_slice_current_layer_value_cache = slice_by_index(begin = attention_20_slice_current_layer_value_cache_begin_0, end = attention_20_slice_current_layer_value_cache_end_0, squeeze_mask = attention_20_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_41)[name = string("attention_20_slice_current_layer_value_cache")]; int32 attention_20_slice_value_cache_heads_axis_0 = const()[name = string("attention_20_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_20_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_20_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_20_slice_value_cache_heads_0, tensor attention_20_slice_value_cache_heads_1 = split(axis = attention_20_slice_value_cache_heads_axis_0, num_splits = attention_20_slice_value_cache_heads_num_splits_0, x = attention_20_slice_current_layer_value_cache)[name = string("attention_20_slice_value_cache_heads")]; bool attention_20_scores_0_transpose_y_0 = const()[name = string("attention_20_scores_0_transpose_y_0"), val = bool(true)]; bool attention_20_scores_0_transpose_x_0 = const()[name = string("attention_20_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_20_scores_0 = matmul(transpose_x = attention_20_scores_0_transpose_x_0, transpose_y = attention_20_scores_0_transpose_y_0, x = attention_20_key_cache_head_0, y = attention_20_q_splits_0)[name = string("attention_20_scores_0")]; fp16 attention_20_scaled_scores_0_y_0 = const()[name = string("attention_20_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_20_scaled_scores_0 = mul(x = attention_20_scores_0, y = attention_20_scaled_scores_0_y_0)[name = string("attention_20_scaled_scores_0")]; tensor attention_20_masked_scaled_scores_0 = add(x = attention_20_scaled_scores_0, y = transpose_0)[name = string("attention_20_masked_scaled_scores_0")]; int32 softmax_40_axis_0 = const()[name = string("softmax_40_axis_0"), val = int32(-2)]; tensor softmax_40 = softmax(axis = softmax_40_axis_0, x = attention_20_masked_scaled_scores_0)[name = string("softmax_40")]; bool attention_20_attention_0_transpose_x_0 = const()[name = string("attention_20_attention_0_transpose_x_0"), val = bool(true)]; bool attention_20_attention_0_transpose_y_0 = const()[name = string("attention_20_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_20_attention_0 = matmul(transpose_x = attention_20_attention_0_transpose_x_0, transpose_y = attention_20_attention_0_transpose_y_0, x = softmax_40, y = attention_20_slice_value_cache_heads_0)[name = string("attention_20_attention_0")]; bool attention_20_scores_1_transpose_y_0 = const()[name = string("attention_20_scores_1_transpose_y_0"), val = bool(true)]; bool attention_20_scores_1_transpose_x_0 = const()[name = string("attention_20_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_20_scores_1 = matmul(transpose_x = attention_20_scores_1_transpose_x_0, transpose_y = attention_20_scores_1_transpose_y_0, x = attention_20_key_cache_head_1, y = attention_20_q_splits_1)[name = string("attention_20_scores_1")]; fp16 attention_20_scaled_scores_1_y_0 = const()[name = string("attention_20_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_20_scaled_scores_1 = mul(x = attention_20_scores_1, y = attention_20_scaled_scores_1_y_0)[name = string("attention_20_scaled_scores_1")]; tensor attention_20_masked_scaled_scores_1 = add(x = attention_20_scaled_scores_1, y = transpose_0)[name = string("attention_20_masked_scaled_scores_1")]; int32 softmax_41_axis_0 = const()[name = string("softmax_41_axis_0"), val = int32(-2)]; tensor softmax_41 = softmax(axis = softmax_41_axis_0, x = attention_20_masked_scaled_scores_1)[name = string("softmax_41")]; bool attention_20_attention_1_transpose_x_0 = const()[name = string("attention_20_attention_1_transpose_x_0"), val = bool(true)]; bool attention_20_attention_1_transpose_y_0 = const()[name = string("attention_20_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_20_attention_1 = matmul(transpose_x = attention_20_attention_1_transpose_x_0, transpose_y = attention_20_attention_1_transpose_y_0, x = softmax_41, y = attention_20_slice_value_cache_heads_1)[name = string("attention_20_attention_1")]; int32 attention_20_concat_attention_all_heads_axis_0 = const()[name = string("attention_20_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_20_concat_attention_all_heads_interleave_0 = const()[name = string("attention_20_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_20_concat_attention_all_heads = concat(axis = attention_20_concat_attention_all_heads_axis_0, interleave = attention_20_concat_attention_all_heads_interleave_0, values = (attention_20_attention_0, attention_20_attention_1))[name = string("attention_20_concat_attention_all_heads")]; tensor attention_20_channels_first_retransposed_perm_0 = const()[name = string("attention_20_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_20_reshaped_shape_0 = const()[name = string("attention_20_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_20_channels_first_retransposed = transpose(perm = attention_20_channels_first_retransposed_perm_0, x = attention_20_concat_attention_all_heads)[name = string("transpose_7")]; tensor attention_20_reshaped = reshape(shape = attention_20_reshaped_shape_0, x = attention_20_channels_first_retransposed)[name = string("attention_20_reshaped")]; tensor attention_20_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524347392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524949568))))[name = string("attention_20_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_96 = constexpr_blockwise_shift_scale(data = attention_20_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524978304))))[name = string("constexpr_blockwise_shift_scale_96")]; tensor attention_20_outproj_strides_0 = const()[name = string("attention_20_outproj_strides_0"), val = tensor([1])]; string attention_20_outproj_pad_type_0 = const()[name = string("attention_20_outproj_pad_type_0"), val = string("valid")]; tensor attention_20_outproj_pad_0 = const()[name = string("attention_20_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_20_outproj_dilations_0 = const()[name = string("attention_20_outproj_dilations_0"), val = tensor([1])]; int32 attention_20_outproj_groups_0 = const()[name = string("attention_20_outproj_groups_0"), val = int32(1)]; tensor attention_20_outproj = conv(dilations = attention_20_outproj_dilations_0, groups = attention_20_outproj_groups_0, pad = attention_20_outproj_pad_0, pad_type = attention_20_outproj_pad_type_0, strides = attention_20_outproj_strides_0, weight = constexpr_blockwise_shift_scale_96, x = attention_20_reshaped)[name = string("attention_20_outproj")]; tensor block_20_residual_1 = add(x = block_19_residual_2, y = attention_20_outproj)[name = string("block_20_residual_1")]; tensor block_20_ffn_rmsnorm_abs = abs(x = block_20_residual_1)[name = string("block_20_ffn_rmsnorm_abs")]; tensor block_20_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_20_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_20_ffn_rmsnorm_maxval = reduce_max(axes = block_20_ffn_rmsnorm_maxval_axes_0, keep_dims = block_20_ffn_rmsnorm_maxval_keep_dims_0, x = block_20_ffn_rmsnorm_abs)[name = string("block_20_ffn_rmsnorm_maxval")]; fp16 block_20_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_20_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_20_ffn_rmsnorm_maxval_clipped = clip(alpha = block_20_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_20_ffn_rmsnorm_maxval_clipped_beta_0, x = block_20_ffn_rmsnorm_maxval)[name = string("block_20_ffn_rmsnorm_maxval_clipped")]; tensor block_20_ffn_rmsnorm_scaled = real_div(x = block_20_residual_1, y = block_20_ffn_rmsnorm_maxval_clipped)[name = string("block_20_ffn_rmsnorm_scaled")]; tensor block_20_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_20_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_20_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_20_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_20_ffn_rmsnorm_scaled)[name = string("block_20_ffn_rmsnorm_squared_sum")]; fp16 block_20_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_20_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_20_ffn_rmsnorm_rsqrt_epsilon_0, x = block_20_ffn_rmsnorm_squared_sum)[name = string("block_20_ffn_rmsnorm_rsqrt")]; fp16 block_20_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_20_ffn_rmsnorm_dim_scaled = mul(x = block_20_ffn_rmsnorm_scaled, y = block_20_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_20_ffn_rmsnorm_dim_scaled")]; tensor block_20_ffn_rmsnorm_normalized = mul(x = block_20_ffn_rmsnorm_dim_scaled, y = block_20_ffn_rmsnorm_rsqrt)[name = string("block_20_ffn_rmsnorm_normalized")]; tensor block_20_ffn_rmsnorm_y_0 = const()[name = string("block_20_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524980160)))]; tensor block_20_ffn_rmsnorm = mul(x = block_20_ffn_rmsnorm_normalized, y = block_20_ffn_rmsnorm_y_0)[name = string("block_20_ffn_rmsnorm")]; tensor block_20_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524982016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528250688))))[name = string("block_20_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_97 = constexpr_blockwise_shift_scale(data = block_20_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528406400))))[name = string("constexpr_blockwise_shift_scale_97")]; tensor block_20_ffn_inproj_strides_0 = const()[name = string("block_20_ffn_inproj_strides_0"), val = tensor([1])]; string block_20_ffn_inproj_pad_type_0 = const()[name = string("block_20_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_20_ffn_inproj_pad_0 = const()[name = string("block_20_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_20_ffn_inproj_dilations_0 = const()[name = string("block_20_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_20_ffn_inproj_groups_0 = const()[name = string("block_20_ffn_inproj_groups_0"), val = int32(1)]; tensor block_20_ffn_inproj = conv(dilations = block_20_ffn_inproj_dilations_0, groups = block_20_ffn_inproj_groups_0, pad = block_20_ffn_inproj_pad_0, pad_type = block_20_ffn_inproj_pad_type_0, strides = block_20_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_97, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_inproj")]; tensor block_20_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528416192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531684864))))[name = string("block_20_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_98 = constexpr_blockwise_shift_scale(data = block_20_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531840576))))[name = string("constexpr_blockwise_shift_scale_98")]; tensor block_20_ffn_g_strides_0 = const()[name = string("block_20_ffn_g_strides_0"), val = tensor([1])]; string block_20_ffn_g_pad_type_0 = const()[name = string("block_20_ffn_g_pad_type_0"), val = string("valid")]; tensor block_20_ffn_g_pad_0 = const()[name = string("block_20_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_20_ffn_g_dilations_0 = const()[name = string("block_20_ffn_g_dilations_0"), val = tensor([1])]; int32 block_20_ffn_g_groups_0 = const()[name = string("block_20_ffn_g_groups_0"), val = int32(1)]; tensor block_20_ffn_g = conv(dilations = block_20_ffn_g_dilations_0, groups = block_20_ffn_g_groups_0, pad = block_20_ffn_g_pad_0, pad_type = block_20_ffn_g_pad_type_0, strides = block_20_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_98, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_g")]; tensor block_20_ffn_g_activation = silu(x = block_20_ffn_g)[name = string("block_20_ffn_g_activation")]; tensor block_20_ffn_x_gated = mul(x = block_20_ffn_inproj, y = block_20_ffn_g_activation)[name = string("block_20_ffn_x_gated")]; tensor block_20_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531850368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535119040))))[name = string("block_20_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_99 = constexpr_blockwise_shift_scale(data = block_20_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535147776))))[name = string("constexpr_blockwise_shift_scale_99")]; tensor block_20_ffn_outproj_strides_0 = const()[name = string("block_20_ffn_outproj_strides_0"), val = tensor([1])]; string block_20_ffn_outproj_pad_type_0 = const()[name = string("block_20_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_20_ffn_outproj_pad_0 = const()[name = string("block_20_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_20_ffn_outproj_dilations_0 = const()[name = string("block_20_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_20_ffn_outproj_groups_0 = const()[name = string("block_20_ffn_outproj_groups_0"), val = int32(1)]; tensor block_20_ffn_outproj = conv(dilations = block_20_ffn_outproj_dilations_0, groups = block_20_ffn_outproj_groups_0, pad = block_20_ffn_outproj_pad_0, pad_type = block_20_ffn_outproj_pad_type_0, strides = block_20_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_99, x = block_20_ffn_x_gated)[name = string("block_20_ffn_outproj")]; tensor block_20_residual_2 = add(x = block_20_ffn_outproj, y = block_20_residual_1)[name = string("block_20_residual_2")]; tensor block_21_attention_rmsnorm_abs = abs(x = block_20_residual_2)[name = string("block_21_attention_rmsnorm_abs")]; tensor block_21_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_21_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_21_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_21_attention_rmsnorm_maxval = reduce_max(axes = block_21_attention_rmsnorm_maxval_axes_0, keep_dims = block_21_attention_rmsnorm_maxval_keep_dims_0, x = block_21_attention_rmsnorm_abs)[name = string("block_21_attention_rmsnorm_maxval")]; fp16 block_21_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_21_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_21_attention_rmsnorm_maxval_clipped = clip(alpha = block_21_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_21_attention_rmsnorm_maxval_clipped_beta_0, x = block_21_attention_rmsnorm_maxval)[name = string("block_21_attention_rmsnorm_maxval_clipped")]; tensor block_21_attention_rmsnorm_scaled = real_div(x = block_20_residual_2, y = block_21_attention_rmsnorm_maxval_clipped)[name = string("block_21_attention_rmsnorm_scaled")]; tensor block_21_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_21_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_21_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_21_attention_rmsnorm_squared_sum_keep_dims_0, x = block_21_attention_rmsnorm_scaled)[name = string("block_21_attention_rmsnorm_squared_sum")]; fp16 block_21_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_21_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_21_attention_rmsnorm_rsqrt_epsilon_0, x = block_21_attention_rmsnorm_squared_sum)[name = string("block_21_attention_rmsnorm_rsqrt")]; fp16 block_21_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_21_attention_rmsnorm_dim_scaled = mul(x = block_21_attention_rmsnorm_scaled, y = block_21_attention_rmsnorm_dim_scaled_y_0)[name = string("block_21_attention_rmsnorm_dim_scaled")]; tensor block_21_attention_rmsnorm_normalized = mul(x = block_21_attention_rmsnorm_dim_scaled, y = block_21_attention_rmsnorm_rsqrt)[name = string("block_21_attention_rmsnorm_normalized")]; tensor block_21_attention_rmsnorm_y_0 = const()[name = string("block_21_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535149632)))]; tensor block_21_attention_rmsnorm = mul(x = block_21_attention_rmsnorm_normalized, y = block_21_attention_rmsnorm_y_0)[name = string("block_21_attention_rmsnorm")]; tensor attention_21_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535151488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535925696))))[name = string("attention_21_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_100 = constexpr_blockwise_shift_scale(data = attention_21_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535962624))))[name = string("constexpr_blockwise_shift_scale_100")]; tensor attention_21_qkvproj_bias_0 = const()[name = string("attention_21_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535964992)))]; tensor attention_21_qkvproj_strides_0 = const()[name = string("attention_21_qkvproj_strides_0"), val = tensor([1])]; string attention_21_qkvproj_pad_type_0 = const()[name = string("attention_21_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_21_qkvproj_pad_0 = const()[name = string("attention_21_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_21_qkvproj_dilations_0 = const()[name = string("attention_21_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_21_qkvproj_groups_0 = const()[name = string("attention_21_qkvproj_groups_0"), val = int32(1)]; tensor attention_21_qkvproj = conv(bias = attention_21_qkvproj_bias_0, dilations = attention_21_qkvproj_dilations_0, groups = attention_21_qkvproj_groups_0, pad = attention_21_qkvproj_pad_0, pad_type = attention_21_qkvproj_pad_type_0, strides = attention_21_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_100, x = block_21_attention_rmsnorm)[name = string("attention_21_qkvproj")]; tensor attention_21_head_reshape_shape_0 = const()[name = string("attention_21_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_21_head_reshape = reshape(shape = attention_21_head_reshape_shape_0, x = attention_21_qkvproj)[name = string("attention_21_head_reshape")]; tensor attention_21_head_transpose_perm_0 = const()[name = string("attention_21_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_21_split_qkv_heads_axis_0 = const()[name = string("attention_21_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_21_split_qkv_heads_split_sizes_0 = const()[name = string("attention_21_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_21_head_transpose = transpose(perm = attention_21_head_transpose_perm_0, x = attention_21_head_reshape)[name = string("transpose_6")]; tensor attention_21_split_qkv_heads_0, tensor attention_21_split_qkv_heads_1, tensor attention_21_split_qkv_heads_2 = split(axis = attention_21_split_qkv_heads_axis_0, split_sizes = attention_21_split_qkv_heads_split_sizes_0, x = attention_21_head_transpose)[name = string("attention_21_split_qkv_heads")]; tensor attention_21_q_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_21_q_rope_lhs_mult")]; int32 attention_21_q_rotate_half_split_num_splits_0 = const()[name = string("attention_21_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_21_q_rotate_half_split_axis_0 = const()[name = string("attention_21_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_21_q_rotate_half_split_0, tensor attention_21_q_rotate_half_split_1 = split(axis = attention_21_q_rotate_half_split_axis_0, num_splits = attention_21_q_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_0)[name = string("attention_21_q_rotate_half_split")]; fp16 attention_21_q_rotate_half_neg_y_0 = const()[name = string("attention_21_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_21_q_rotate_half_neg = mul(x = attention_21_q_rotate_half_split_1, y = attention_21_q_rotate_half_neg_y_0)[name = string("attention_21_q_rotate_half_neg")]; int32 attention_21_q_rotate_half_concat_axis_0 = const()[name = string("attention_21_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_21_q_rotate_half_concat_interleave_0 = const()[name = string("attention_21_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_21_q_rotate_half_concat = concat(axis = attention_21_q_rotate_half_concat_axis_0, interleave = attention_21_q_rotate_half_concat_interleave_0, values = (attention_21_q_rotate_half_neg, attention_21_q_rotate_half_split_0))[name = string("attention_21_q_rotate_half_concat")]; tensor attention_21_q_rope_rhs_mult = mul(x = attention_21_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_q_rope_rhs_mult")]; tensor attention_21_q_rope = add(x = attention_21_q_rope_lhs_mult, y = attention_21_q_rope_rhs_mult)[name = string("attention_21_q_rope")]; tensor attention_21_k_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_21_k_rope_lhs_mult")]; int32 attention_21_k_rotate_half_split_num_splits_0 = const()[name = string("attention_21_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_21_k_rotate_half_split_axis_0 = const()[name = string("attention_21_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_21_k_rotate_half_split_0, tensor attention_21_k_rotate_half_split_1 = split(axis = attention_21_k_rotate_half_split_axis_0, num_splits = attention_21_k_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_1)[name = string("attention_21_k_rotate_half_split")]; fp16 attention_21_k_rotate_half_neg_y_0 = const()[name = string("attention_21_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_21_k_rotate_half_neg = mul(x = attention_21_k_rotate_half_split_1, y = attention_21_k_rotate_half_neg_y_0)[name = string("attention_21_k_rotate_half_neg")]; int32 attention_21_k_rotate_half_concat_axis_0 = const()[name = string("attention_21_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_21_k_rotate_half_concat_interleave_0 = const()[name = string("attention_21_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_21_k_rotate_half_concat = concat(axis = attention_21_k_rotate_half_concat_axis_0, interleave = attention_21_k_rotate_half_concat_interleave_0, values = (attention_21_k_rotate_half_neg, attention_21_k_rotate_half_split_0))[name = string("attention_21_k_rotate_half_concat")]; tensor attention_21_k_rope_rhs_mult = mul(x = attention_21_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_k_rope_rhs_mult")]; tensor attention_21_k_rope = add(x = attention_21_k_rope_lhs_mult, y = attention_21_k_rope_rhs_mult)[name = string("attention_21_k_rope")]; int32 attention_21_q_splits_axis_0 = const()[name = string("attention_21_q_splits_axis_0"), val = int32(1)]; int32 attention_21_q_splits_num_splits_0 = const()[name = string("attention_21_q_splits_num_splits_0"), val = int32(2)]; tensor attention_21_q_splits_0, tensor attention_21_q_splits_1 = split(axis = attention_21_q_splits_axis_0, num_splits = attention_21_q_splits_num_splits_0, x = attention_21_q_rope)[name = string("attention_21_q_splits")]; tensor attention_21_update_begin_0_values0_0 = const()[name = string("attention_21_update_begin_0_values0_0"), val = tensor([21])]; tensor attention_21_update_begin_0_values1_0 = const()[name = string("attention_21_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_21_update_begin_0_values3_0 = const()[name = string("attention_21_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_21_update_begin_0_axis_0 = const()[name = string("attention_21_update_begin_0_axis_0"), val = int32(0)]; bool attention_21_update_begin_0_interleave_0 = const()[name = string("attention_21_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_21_update_begin_0 = concat(axis = attention_21_update_begin_0_axis_0, interleave = attention_21_update_begin_0_interleave_0, values = (attention_21_update_begin_0_values0_0, attention_21_update_begin_0_values1_0, query_pos1, attention_21_update_begin_0_values3_0))[name = string("attention_21_update_begin_0")]; tensor attention_21_update_end_0_values0_0 = const()[name = string("attention_21_update_end_0_values0_0"), val = tensor([22])]; tensor attention_21_update_end_0_values1_0 = const()[name = string("attention_21_update_end_0_values1_0"), val = tensor([2])]; tensor attention_21_update_end_0_values3_0 = const()[name = string("attention_21_update_end_0_values3_0"), val = tensor([64])]; int32 attention_21_update_end_0_axis_0 = const()[name = string("attention_21_update_end_0_axis_0"), val = int32(0)]; bool attention_21_update_end_0_interleave_0 = const()[name = string("attention_21_update_end_0_interleave_0"), val = bool(false)]; tensor attention_21_update_end_0 = concat(axis = attention_21_update_end_0_axis_0, interleave = attention_21_update_end_0_interleave_0, values = (attention_21_update_end_0_values0_0, attention_21_update_end_0_values1_0, end_pos_0, attention_21_update_end_0_values3_0))[name = string("attention_21_update_end_0")]; tensor attention_21_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_updated_key_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_key_cache_0_squeeze_mask_0, update = attention_21_k_rope, x = coreml_update_state_40)[name = string("attention_21_updated_key_cache_0")]; write_state(data = attention_21_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = key_cache_state)[name = string("coreml_update_state_42")]; tensor attention_21_key_cache_begin_0 = const()[name = string("attention_21_key_cache_begin_0"), val = tensor([21, 0, 0, 0])]; tensor attention_21_key_cache_end_0 = const()[name = string("attention_21_key_cache_end_0"), val = tensor([22, 2, 512, 64])]; tensor attention_21_key_cache_squeeze_mask_0 = const()[name = string("attention_21_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_key_cache = slice_by_index(begin = attention_21_key_cache_begin_0, end = attention_21_key_cache_end_0, squeeze_mask = attention_21_key_cache_squeeze_mask_0, x = coreml_update_state_42)[name = string("attention_21_key_cache")]; int32 attention_21_key_cache_head_axis_0 = const()[name = string("attention_21_key_cache_head_axis_0"), val = int32(1)]; int32 attention_21_key_cache_head_num_splits_0 = const()[name = string("attention_21_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_21_key_cache_head_0, tensor attention_21_key_cache_head_1 = split(axis = attention_21_key_cache_head_axis_0, num_splits = attention_21_key_cache_head_num_splits_0, x = attention_21_key_cache)[name = string("attention_21_key_cache_head")]; tensor attention_21_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_updated_value_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_value_cache_0_squeeze_mask_0, update = attention_21_split_qkv_heads_2, x = coreml_update_state_41)[name = string("attention_21_updated_value_cache_0")]; write_state(data = attention_21_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = value_cache_state)[name = string("coreml_update_state_43")]; tensor attention_21_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_21_slice_current_layer_value_cache_begin_0"), val = tensor([21, 0, 0, 0])]; tensor attention_21_slice_current_layer_value_cache_end_0 = const()[name = string("attention_21_slice_current_layer_value_cache_end_0"), val = tensor([22, 2, 512, 64])]; tensor attention_21_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_21_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_slice_current_layer_value_cache = slice_by_index(begin = attention_21_slice_current_layer_value_cache_begin_0, end = attention_21_slice_current_layer_value_cache_end_0, squeeze_mask = attention_21_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_43)[name = string("attention_21_slice_current_layer_value_cache")]; int32 attention_21_slice_value_cache_heads_axis_0 = const()[name = string("attention_21_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_21_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_21_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_21_slice_value_cache_heads_0, tensor attention_21_slice_value_cache_heads_1 = split(axis = attention_21_slice_value_cache_heads_axis_0, num_splits = attention_21_slice_value_cache_heads_num_splits_0, x = attention_21_slice_current_layer_value_cache)[name = string("attention_21_slice_value_cache_heads")]; bool attention_21_scores_0_transpose_y_0 = const()[name = string("attention_21_scores_0_transpose_y_0"), val = bool(true)]; bool attention_21_scores_0_transpose_x_0 = const()[name = string("attention_21_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_21_scores_0 = matmul(transpose_x = attention_21_scores_0_transpose_x_0, transpose_y = attention_21_scores_0_transpose_y_0, x = attention_21_key_cache_head_0, y = attention_21_q_splits_0)[name = string("attention_21_scores_0")]; fp16 attention_21_scaled_scores_0_y_0 = const()[name = string("attention_21_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_21_scaled_scores_0 = mul(x = attention_21_scores_0, y = attention_21_scaled_scores_0_y_0)[name = string("attention_21_scaled_scores_0")]; tensor attention_21_masked_scaled_scores_0 = add(x = attention_21_scaled_scores_0, y = transpose_0)[name = string("attention_21_masked_scaled_scores_0")]; int32 softmax_42_axis_0 = const()[name = string("softmax_42_axis_0"), val = int32(-2)]; tensor softmax_42 = softmax(axis = softmax_42_axis_0, x = attention_21_masked_scaled_scores_0)[name = string("softmax_42")]; bool attention_21_attention_0_transpose_x_0 = const()[name = string("attention_21_attention_0_transpose_x_0"), val = bool(true)]; bool attention_21_attention_0_transpose_y_0 = const()[name = string("attention_21_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_21_attention_0 = matmul(transpose_x = attention_21_attention_0_transpose_x_0, transpose_y = attention_21_attention_0_transpose_y_0, x = softmax_42, y = attention_21_slice_value_cache_heads_0)[name = string("attention_21_attention_0")]; bool attention_21_scores_1_transpose_y_0 = const()[name = string("attention_21_scores_1_transpose_y_0"), val = bool(true)]; bool attention_21_scores_1_transpose_x_0 = const()[name = string("attention_21_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_21_scores_1 = matmul(transpose_x = attention_21_scores_1_transpose_x_0, transpose_y = attention_21_scores_1_transpose_y_0, x = attention_21_key_cache_head_1, y = attention_21_q_splits_1)[name = string("attention_21_scores_1")]; fp16 attention_21_scaled_scores_1_y_0 = const()[name = string("attention_21_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_21_scaled_scores_1 = mul(x = attention_21_scores_1, y = attention_21_scaled_scores_1_y_0)[name = string("attention_21_scaled_scores_1")]; tensor attention_21_masked_scaled_scores_1 = add(x = attention_21_scaled_scores_1, y = transpose_0)[name = string("attention_21_masked_scaled_scores_1")]; int32 softmax_43_axis_0 = const()[name = string("softmax_43_axis_0"), val = int32(-2)]; tensor softmax_43 = softmax(axis = softmax_43_axis_0, x = attention_21_masked_scaled_scores_1)[name = string("softmax_43")]; bool attention_21_attention_1_transpose_x_0 = const()[name = string("attention_21_attention_1_transpose_x_0"), val = bool(true)]; bool attention_21_attention_1_transpose_y_0 = const()[name = string("attention_21_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_21_attention_1 = matmul(transpose_x = attention_21_attention_1_transpose_x_0, transpose_y = attention_21_attention_1_transpose_y_0, x = softmax_43, y = attention_21_slice_value_cache_heads_1)[name = string("attention_21_attention_1")]; int32 attention_21_concat_attention_all_heads_axis_0 = const()[name = string("attention_21_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_21_concat_attention_all_heads_interleave_0 = const()[name = string("attention_21_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_21_concat_attention_all_heads = concat(axis = attention_21_concat_attention_all_heads_axis_0, interleave = attention_21_concat_attention_all_heads_interleave_0, values = (attention_21_attention_0, attention_21_attention_1))[name = string("attention_21_concat_attention_all_heads")]; tensor attention_21_channels_first_retransposed_perm_0 = const()[name = string("attention_21_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_21_reshaped_shape_0 = const()[name = string("attention_21_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_21_channels_first_retransposed = transpose(perm = attention_21_channels_first_retransposed_perm_0, x = attention_21_concat_attention_all_heads)[name = string("transpose_5")]; tensor attention_21_reshaped = reshape(shape = attention_21_reshaped_shape_0, x = attention_21_channels_first_retransposed)[name = string("attention_21_reshaped")]; tensor attention_21_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536569536))))[name = string("attention_21_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_101 = constexpr_blockwise_shift_scale(data = attention_21_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536598272))))[name = string("constexpr_blockwise_shift_scale_101")]; tensor attention_21_outproj_strides_0 = const()[name = string("attention_21_outproj_strides_0"), val = tensor([1])]; string attention_21_outproj_pad_type_0 = const()[name = string("attention_21_outproj_pad_type_0"), val = string("valid")]; tensor attention_21_outproj_pad_0 = const()[name = string("attention_21_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_21_outproj_dilations_0 = const()[name = string("attention_21_outproj_dilations_0"), val = tensor([1])]; int32 attention_21_outproj_groups_0 = const()[name = string("attention_21_outproj_groups_0"), val = int32(1)]; tensor attention_21_outproj = conv(dilations = attention_21_outproj_dilations_0, groups = attention_21_outproj_groups_0, pad = attention_21_outproj_pad_0, pad_type = attention_21_outproj_pad_type_0, strides = attention_21_outproj_strides_0, weight = constexpr_blockwise_shift_scale_101, x = attention_21_reshaped)[name = string("attention_21_outproj")]; tensor block_21_residual_1 = add(x = block_20_residual_2, y = attention_21_outproj)[name = string("block_21_residual_1")]; tensor block_21_ffn_rmsnorm_abs = abs(x = block_21_residual_1)[name = string("block_21_ffn_rmsnorm_abs")]; tensor block_21_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_21_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_21_ffn_rmsnorm_maxval = reduce_max(axes = block_21_ffn_rmsnorm_maxval_axes_0, keep_dims = block_21_ffn_rmsnorm_maxval_keep_dims_0, x = block_21_ffn_rmsnorm_abs)[name = string("block_21_ffn_rmsnorm_maxval")]; fp16 block_21_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_21_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_21_ffn_rmsnorm_maxval_clipped = clip(alpha = block_21_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_21_ffn_rmsnorm_maxval_clipped_beta_0, x = block_21_ffn_rmsnorm_maxval)[name = string("block_21_ffn_rmsnorm_maxval_clipped")]; tensor block_21_ffn_rmsnorm_scaled = real_div(x = block_21_residual_1, y = block_21_ffn_rmsnorm_maxval_clipped)[name = string("block_21_ffn_rmsnorm_scaled")]; tensor block_21_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_21_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_21_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_21_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_21_ffn_rmsnorm_scaled)[name = string("block_21_ffn_rmsnorm_squared_sum")]; fp16 block_21_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_21_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_21_ffn_rmsnorm_rsqrt_epsilon_0, x = block_21_ffn_rmsnorm_squared_sum)[name = string("block_21_ffn_rmsnorm_rsqrt")]; fp16 block_21_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_21_ffn_rmsnorm_dim_scaled = mul(x = block_21_ffn_rmsnorm_scaled, y = block_21_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_21_ffn_rmsnorm_dim_scaled")]; tensor block_21_ffn_rmsnorm_normalized = mul(x = block_21_ffn_rmsnorm_dim_scaled, y = block_21_ffn_rmsnorm_rsqrt)[name = string("block_21_ffn_rmsnorm_normalized")]; tensor block_21_ffn_rmsnorm_y_0 = const()[name = string("block_21_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536600128)))]; tensor block_21_ffn_rmsnorm = mul(x = block_21_ffn_rmsnorm_normalized, y = block_21_ffn_rmsnorm_y_0)[name = string("block_21_ffn_rmsnorm")]; tensor block_21_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536601984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539870656))))[name = string("block_21_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_102 = constexpr_blockwise_shift_scale(data = block_21_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540026368))))[name = string("constexpr_blockwise_shift_scale_102")]; tensor block_21_ffn_inproj_strides_0 = const()[name = string("block_21_ffn_inproj_strides_0"), val = tensor([1])]; string block_21_ffn_inproj_pad_type_0 = const()[name = string("block_21_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_21_ffn_inproj_pad_0 = const()[name = string("block_21_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_21_ffn_inproj_dilations_0 = const()[name = string("block_21_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_21_ffn_inproj_groups_0 = const()[name = string("block_21_ffn_inproj_groups_0"), val = int32(1)]; tensor block_21_ffn_inproj = conv(dilations = block_21_ffn_inproj_dilations_0, groups = block_21_ffn_inproj_groups_0, pad = block_21_ffn_inproj_pad_0, pad_type = block_21_ffn_inproj_pad_type_0, strides = block_21_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_102, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_inproj")]; tensor block_21_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540036160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543304832))))[name = string("block_21_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_103 = constexpr_blockwise_shift_scale(data = block_21_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543460544))))[name = string("constexpr_blockwise_shift_scale_103")]; tensor block_21_ffn_g_strides_0 = const()[name = string("block_21_ffn_g_strides_0"), val = tensor([1])]; string block_21_ffn_g_pad_type_0 = const()[name = string("block_21_ffn_g_pad_type_0"), val = string("valid")]; tensor block_21_ffn_g_pad_0 = const()[name = string("block_21_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_21_ffn_g_dilations_0 = const()[name = string("block_21_ffn_g_dilations_0"), val = tensor([1])]; int32 block_21_ffn_g_groups_0 = const()[name = string("block_21_ffn_g_groups_0"), val = int32(1)]; tensor block_21_ffn_g = conv(dilations = block_21_ffn_g_dilations_0, groups = block_21_ffn_g_groups_0, pad = block_21_ffn_g_pad_0, pad_type = block_21_ffn_g_pad_type_0, strides = block_21_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_103, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_g")]; tensor block_21_ffn_g_activation = silu(x = block_21_ffn_g)[name = string("block_21_ffn_g_activation")]; tensor block_21_ffn_x_gated = mul(x = block_21_ffn_inproj, y = block_21_ffn_g_activation)[name = string("block_21_ffn_x_gated")]; tensor block_21_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543470336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546739008))))[name = string("block_21_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_104 = constexpr_blockwise_shift_scale(data = block_21_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546767744))))[name = string("constexpr_blockwise_shift_scale_104")]; tensor block_21_ffn_outproj_strides_0 = const()[name = string("block_21_ffn_outproj_strides_0"), val = tensor([1])]; string block_21_ffn_outproj_pad_type_0 = const()[name = string("block_21_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_21_ffn_outproj_pad_0 = const()[name = string("block_21_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_21_ffn_outproj_dilations_0 = const()[name = string("block_21_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_21_ffn_outproj_groups_0 = const()[name = string("block_21_ffn_outproj_groups_0"), val = int32(1)]; tensor block_21_ffn_outproj = conv(dilations = block_21_ffn_outproj_dilations_0, groups = block_21_ffn_outproj_groups_0, pad = block_21_ffn_outproj_pad_0, pad_type = block_21_ffn_outproj_pad_type_0, strides = block_21_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_104, x = block_21_ffn_x_gated)[name = string("block_21_ffn_outproj")]; tensor block_21_residual_2 = add(x = block_21_ffn_outproj, y = block_21_residual_1)[name = string("block_21_residual_2")]; tensor block_22_attention_rmsnorm_abs = abs(x = block_21_residual_2)[name = string("block_22_attention_rmsnorm_abs")]; tensor block_22_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_22_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_22_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_22_attention_rmsnorm_maxval = reduce_max(axes = block_22_attention_rmsnorm_maxval_axes_0, keep_dims = block_22_attention_rmsnorm_maxval_keep_dims_0, x = block_22_attention_rmsnorm_abs)[name = string("block_22_attention_rmsnorm_maxval")]; fp16 block_22_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_22_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_22_attention_rmsnorm_maxval_clipped = clip(alpha = block_22_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_22_attention_rmsnorm_maxval_clipped_beta_0, x = block_22_attention_rmsnorm_maxval)[name = string("block_22_attention_rmsnorm_maxval_clipped")]; tensor block_22_attention_rmsnorm_scaled = real_div(x = block_21_residual_2, y = block_22_attention_rmsnorm_maxval_clipped)[name = string("block_22_attention_rmsnorm_scaled")]; tensor block_22_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_22_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_22_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_22_attention_rmsnorm_squared_sum_keep_dims_0, x = block_22_attention_rmsnorm_scaled)[name = string("block_22_attention_rmsnorm_squared_sum")]; fp16 block_22_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_22_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_22_attention_rmsnorm_rsqrt_epsilon_0, x = block_22_attention_rmsnorm_squared_sum)[name = string("block_22_attention_rmsnorm_rsqrt")]; fp16 block_22_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_22_attention_rmsnorm_dim_scaled = mul(x = block_22_attention_rmsnorm_scaled, y = block_22_attention_rmsnorm_dim_scaled_y_0)[name = string("block_22_attention_rmsnorm_dim_scaled")]; tensor block_22_attention_rmsnorm_normalized = mul(x = block_22_attention_rmsnorm_dim_scaled, y = block_22_attention_rmsnorm_rsqrt)[name = string("block_22_attention_rmsnorm_normalized")]; tensor block_22_attention_rmsnorm_y_0 = const()[name = string("block_22_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546769600)))]; tensor block_22_attention_rmsnorm = mul(x = block_22_attention_rmsnorm_normalized, y = block_22_attention_rmsnorm_y_0)[name = string("block_22_attention_rmsnorm")]; tensor attention_22_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546771456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547545664))))[name = string("attention_22_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_105 = constexpr_blockwise_shift_scale(data = attention_22_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547582592))))[name = string("constexpr_blockwise_shift_scale_105")]; tensor attention_22_qkvproj_bias_0 = const()[name = string("attention_22_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547584960)))]; tensor attention_22_qkvproj_strides_0 = const()[name = string("attention_22_qkvproj_strides_0"), val = tensor([1])]; string attention_22_qkvproj_pad_type_0 = const()[name = string("attention_22_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_22_qkvproj_pad_0 = const()[name = string("attention_22_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_22_qkvproj_dilations_0 = const()[name = string("attention_22_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_22_qkvproj_groups_0 = const()[name = string("attention_22_qkvproj_groups_0"), val = int32(1)]; tensor attention_22_qkvproj = conv(bias = attention_22_qkvproj_bias_0, dilations = attention_22_qkvproj_dilations_0, groups = attention_22_qkvproj_groups_0, pad = attention_22_qkvproj_pad_0, pad_type = attention_22_qkvproj_pad_type_0, strides = attention_22_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_105, x = block_22_attention_rmsnorm)[name = string("attention_22_qkvproj")]; tensor attention_22_head_reshape_shape_0 = const()[name = string("attention_22_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_22_head_reshape = reshape(shape = attention_22_head_reshape_shape_0, x = attention_22_qkvproj)[name = string("attention_22_head_reshape")]; tensor attention_22_head_transpose_perm_0 = const()[name = string("attention_22_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_22_split_qkv_heads_axis_0 = const()[name = string("attention_22_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_22_split_qkv_heads_split_sizes_0 = const()[name = string("attention_22_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_22_head_transpose = transpose(perm = attention_22_head_transpose_perm_0, x = attention_22_head_reshape)[name = string("transpose_4")]; tensor attention_22_split_qkv_heads_0, tensor attention_22_split_qkv_heads_1, tensor attention_22_split_qkv_heads_2 = split(axis = attention_22_split_qkv_heads_axis_0, split_sizes = attention_22_split_qkv_heads_split_sizes_0, x = attention_22_head_transpose)[name = string("attention_22_split_qkv_heads")]; tensor attention_22_q_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_22_q_rope_lhs_mult")]; int32 attention_22_q_rotate_half_split_num_splits_0 = const()[name = string("attention_22_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_22_q_rotate_half_split_axis_0 = const()[name = string("attention_22_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_22_q_rotate_half_split_0, tensor attention_22_q_rotate_half_split_1 = split(axis = attention_22_q_rotate_half_split_axis_0, num_splits = attention_22_q_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_0)[name = string("attention_22_q_rotate_half_split")]; fp16 attention_22_q_rotate_half_neg_y_0 = const()[name = string("attention_22_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_22_q_rotate_half_neg = mul(x = attention_22_q_rotate_half_split_1, y = attention_22_q_rotate_half_neg_y_0)[name = string("attention_22_q_rotate_half_neg")]; int32 attention_22_q_rotate_half_concat_axis_0 = const()[name = string("attention_22_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_22_q_rotate_half_concat_interleave_0 = const()[name = string("attention_22_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_22_q_rotate_half_concat = concat(axis = attention_22_q_rotate_half_concat_axis_0, interleave = attention_22_q_rotate_half_concat_interleave_0, values = (attention_22_q_rotate_half_neg, attention_22_q_rotate_half_split_0))[name = string("attention_22_q_rotate_half_concat")]; tensor attention_22_q_rope_rhs_mult = mul(x = attention_22_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_q_rope_rhs_mult")]; tensor attention_22_q_rope = add(x = attention_22_q_rope_lhs_mult, y = attention_22_q_rope_rhs_mult)[name = string("attention_22_q_rope")]; tensor attention_22_k_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_22_k_rope_lhs_mult")]; int32 attention_22_k_rotate_half_split_num_splits_0 = const()[name = string("attention_22_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_22_k_rotate_half_split_axis_0 = const()[name = string("attention_22_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_22_k_rotate_half_split_0, tensor attention_22_k_rotate_half_split_1 = split(axis = attention_22_k_rotate_half_split_axis_0, num_splits = attention_22_k_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_1)[name = string("attention_22_k_rotate_half_split")]; fp16 attention_22_k_rotate_half_neg_y_0 = const()[name = string("attention_22_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_22_k_rotate_half_neg = mul(x = attention_22_k_rotate_half_split_1, y = attention_22_k_rotate_half_neg_y_0)[name = string("attention_22_k_rotate_half_neg")]; int32 attention_22_k_rotate_half_concat_axis_0 = const()[name = string("attention_22_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_22_k_rotate_half_concat_interleave_0 = const()[name = string("attention_22_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_22_k_rotate_half_concat = concat(axis = attention_22_k_rotate_half_concat_axis_0, interleave = attention_22_k_rotate_half_concat_interleave_0, values = (attention_22_k_rotate_half_neg, attention_22_k_rotate_half_split_0))[name = string("attention_22_k_rotate_half_concat")]; tensor attention_22_k_rope_rhs_mult = mul(x = attention_22_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_k_rope_rhs_mult")]; tensor attention_22_k_rope = add(x = attention_22_k_rope_lhs_mult, y = attention_22_k_rope_rhs_mult)[name = string("attention_22_k_rope")]; int32 attention_22_q_splits_axis_0 = const()[name = string("attention_22_q_splits_axis_0"), val = int32(1)]; int32 attention_22_q_splits_num_splits_0 = const()[name = string("attention_22_q_splits_num_splits_0"), val = int32(2)]; tensor attention_22_q_splits_0, tensor attention_22_q_splits_1 = split(axis = attention_22_q_splits_axis_0, num_splits = attention_22_q_splits_num_splits_0, x = attention_22_q_rope)[name = string("attention_22_q_splits")]; tensor attention_22_update_begin_0_values0_0 = const()[name = string("attention_22_update_begin_0_values0_0"), val = tensor([22])]; tensor attention_22_update_begin_0_values1_0 = const()[name = string("attention_22_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_22_update_begin_0_values3_0 = const()[name = string("attention_22_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_22_update_begin_0_axis_0 = const()[name = string("attention_22_update_begin_0_axis_0"), val = int32(0)]; bool attention_22_update_begin_0_interleave_0 = const()[name = string("attention_22_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_22_update_begin_0 = concat(axis = attention_22_update_begin_0_axis_0, interleave = attention_22_update_begin_0_interleave_0, values = (attention_22_update_begin_0_values0_0, attention_22_update_begin_0_values1_0, query_pos1, attention_22_update_begin_0_values3_0))[name = string("attention_22_update_begin_0")]; tensor attention_22_update_end_0_values0_0 = const()[name = string("attention_22_update_end_0_values0_0"), val = tensor([23])]; tensor attention_22_update_end_0_values1_0 = const()[name = string("attention_22_update_end_0_values1_0"), val = tensor([2])]; tensor attention_22_update_end_0_values3_0 = const()[name = string("attention_22_update_end_0_values3_0"), val = tensor([64])]; int32 attention_22_update_end_0_axis_0 = const()[name = string("attention_22_update_end_0_axis_0"), val = int32(0)]; bool attention_22_update_end_0_interleave_0 = const()[name = string("attention_22_update_end_0_interleave_0"), val = bool(false)]; tensor attention_22_update_end_0 = concat(axis = attention_22_update_end_0_axis_0, interleave = attention_22_update_end_0_interleave_0, values = (attention_22_update_end_0_values0_0, attention_22_update_end_0_values1_0, end_pos_0, attention_22_update_end_0_values3_0))[name = string("attention_22_update_end_0")]; tensor attention_22_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_updated_key_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_key_cache_0_squeeze_mask_0, update = attention_22_k_rope, x = coreml_update_state_42)[name = string("attention_22_updated_key_cache_0")]; write_state(data = attention_22_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = key_cache_state)[name = string("coreml_update_state_44")]; tensor attention_22_key_cache_begin_0 = const()[name = string("attention_22_key_cache_begin_0"), val = tensor([22, 0, 0, 0])]; tensor attention_22_key_cache_end_0 = const()[name = string("attention_22_key_cache_end_0"), val = tensor([23, 2, 512, 64])]; tensor attention_22_key_cache_squeeze_mask_0 = const()[name = string("attention_22_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_key_cache = slice_by_index(begin = attention_22_key_cache_begin_0, end = attention_22_key_cache_end_0, squeeze_mask = attention_22_key_cache_squeeze_mask_0, x = coreml_update_state_44)[name = string("attention_22_key_cache")]; int32 attention_22_key_cache_head_axis_0 = const()[name = string("attention_22_key_cache_head_axis_0"), val = int32(1)]; int32 attention_22_key_cache_head_num_splits_0 = const()[name = string("attention_22_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_22_key_cache_head_0, tensor attention_22_key_cache_head_1 = split(axis = attention_22_key_cache_head_axis_0, num_splits = attention_22_key_cache_head_num_splits_0, x = attention_22_key_cache)[name = string("attention_22_key_cache_head")]; tensor attention_22_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_updated_value_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_value_cache_0_squeeze_mask_0, update = attention_22_split_qkv_heads_2, x = coreml_update_state_43)[name = string("attention_22_updated_value_cache_0")]; write_state(data = attention_22_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = value_cache_state)[name = string("coreml_update_state_45")]; tensor attention_22_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_22_slice_current_layer_value_cache_begin_0"), val = tensor([22, 0, 0, 0])]; tensor attention_22_slice_current_layer_value_cache_end_0 = const()[name = string("attention_22_slice_current_layer_value_cache_end_0"), val = tensor([23, 2, 512, 64])]; tensor attention_22_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_22_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_slice_current_layer_value_cache = slice_by_index(begin = attention_22_slice_current_layer_value_cache_begin_0, end = attention_22_slice_current_layer_value_cache_end_0, squeeze_mask = attention_22_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_45)[name = string("attention_22_slice_current_layer_value_cache")]; int32 attention_22_slice_value_cache_heads_axis_0 = const()[name = string("attention_22_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_22_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_22_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_22_slice_value_cache_heads_0, tensor attention_22_slice_value_cache_heads_1 = split(axis = attention_22_slice_value_cache_heads_axis_0, num_splits = attention_22_slice_value_cache_heads_num_splits_0, x = attention_22_slice_current_layer_value_cache)[name = string("attention_22_slice_value_cache_heads")]; bool attention_22_scores_0_transpose_y_0 = const()[name = string("attention_22_scores_0_transpose_y_0"), val = bool(true)]; bool attention_22_scores_0_transpose_x_0 = const()[name = string("attention_22_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_22_scores_0 = matmul(transpose_x = attention_22_scores_0_transpose_x_0, transpose_y = attention_22_scores_0_transpose_y_0, x = attention_22_key_cache_head_0, y = attention_22_q_splits_0)[name = string("attention_22_scores_0")]; fp16 attention_22_scaled_scores_0_y_0 = const()[name = string("attention_22_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_22_scaled_scores_0 = mul(x = attention_22_scores_0, y = attention_22_scaled_scores_0_y_0)[name = string("attention_22_scaled_scores_0")]; tensor attention_22_masked_scaled_scores_0 = add(x = attention_22_scaled_scores_0, y = transpose_0)[name = string("attention_22_masked_scaled_scores_0")]; int32 softmax_44_axis_0 = const()[name = string("softmax_44_axis_0"), val = int32(-2)]; tensor softmax_44 = softmax(axis = softmax_44_axis_0, x = attention_22_masked_scaled_scores_0)[name = string("softmax_44")]; bool attention_22_attention_0_transpose_x_0 = const()[name = string("attention_22_attention_0_transpose_x_0"), val = bool(true)]; bool attention_22_attention_0_transpose_y_0 = const()[name = string("attention_22_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_22_attention_0 = matmul(transpose_x = attention_22_attention_0_transpose_x_0, transpose_y = attention_22_attention_0_transpose_y_0, x = softmax_44, y = attention_22_slice_value_cache_heads_0)[name = string("attention_22_attention_0")]; bool attention_22_scores_1_transpose_y_0 = const()[name = string("attention_22_scores_1_transpose_y_0"), val = bool(true)]; bool attention_22_scores_1_transpose_x_0 = const()[name = string("attention_22_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_22_scores_1 = matmul(transpose_x = attention_22_scores_1_transpose_x_0, transpose_y = attention_22_scores_1_transpose_y_0, x = attention_22_key_cache_head_1, y = attention_22_q_splits_1)[name = string("attention_22_scores_1")]; fp16 attention_22_scaled_scores_1_y_0 = const()[name = string("attention_22_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_22_scaled_scores_1 = mul(x = attention_22_scores_1, y = attention_22_scaled_scores_1_y_0)[name = string("attention_22_scaled_scores_1")]; tensor attention_22_masked_scaled_scores_1 = add(x = attention_22_scaled_scores_1, y = transpose_0)[name = string("attention_22_masked_scaled_scores_1")]; int32 softmax_45_axis_0 = const()[name = string("softmax_45_axis_0"), val = int32(-2)]; tensor softmax_45 = softmax(axis = softmax_45_axis_0, x = attention_22_masked_scaled_scores_1)[name = string("softmax_45")]; bool attention_22_attention_1_transpose_x_0 = const()[name = string("attention_22_attention_1_transpose_x_0"), val = bool(true)]; bool attention_22_attention_1_transpose_y_0 = const()[name = string("attention_22_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_22_attention_1 = matmul(transpose_x = attention_22_attention_1_transpose_x_0, transpose_y = attention_22_attention_1_transpose_y_0, x = softmax_45, y = attention_22_slice_value_cache_heads_1)[name = string("attention_22_attention_1")]; int32 attention_22_concat_attention_all_heads_axis_0 = const()[name = string("attention_22_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_22_concat_attention_all_heads_interleave_0 = const()[name = string("attention_22_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_22_concat_attention_all_heads = concat(axis = attention_22_concat_attention_all_heads_axis_0, interleave = attention_22_concat_attention_all_heads_interleave_0, values = (attention_22_attention_0, attention_22_attention_1))[name = string("attention_22_concat_attention_all_heads")]; tensor attention_22_channels_first_retransposed_perm_0 = const()[name = string("attention_22_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_22_reshaped_shape_0 = const()[name = string("attention_22_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_22_channels_first_retransposed = transpose(perm = attention_22_channels_first_retransposed_perm_0, x = attention_22_concat_attention_all_heads)[name = string("transpose_3")]; tensor attention_22_reshaped = reshape(shape = attention_22_reshaped_shape_0, x = attention_22_channels_first_retransposed)[name = string("attention_22_reshaped")]; tensor attention_22_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547587328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548189504))))[name = string("attention_22_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_106 = constexpr_blockwise_shift_scale(data = attention_22_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548218240))))[name = string("constexpr_blockwise_shift_scale_106")]; tensor attention_22_outproj_strides_0 = const()[name = string("attention_22_outproj_strides_0"), val = tensor([1])]; string attention_22_outproj_pad_type_0 = const()[name = string("attention_22_outproj_pad_type_0"), val = string("valid")]; tensor attention_22_outproj_pad_0 = const()[name = string("attention_22_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_22_outproj_dilations_0 = const()[name = string("attention_22_outproj_dilations_0"), val = tensor([1])]; int32 attention_22_outproj_groups_0 = const()[name = string("attention_22_outproj_groups_0"), val = int32(1)]; tensor attention_22_outproj = conv(dilations = attention_22_outproj_dilations_0, groups = attention_22_outproj_groups_0, pad = attention_22_outproj_pad_0, pad_type = attention_22_outproj_pad_type_0, strides = attention_22_outproj_strides_0, weight = constexpr_blockwise_shift_scale_106, x = attention_22_reshaped)[name = string("attention_22_outproj")]; tensor block_22_residual_1 = add(x = block_21_residual_2, y = attention_22_outproj)[name = string("block_22_residual_1")]; tensor block_22_ffn_rmsnorm_abs = abs(x = block_22_residual_1)[name = string("block_22_ffn_rmsnorm_abs")]; tensor block_22_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_22_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_22_ffn_rmsnorm_maxval = reduce_max(axes = block_22_ffn_rmsnorm_maxval_axes_0, keep_dims = block_22_ffn_rmsnorm_maxval_keep_dims_0, x = block_22_ffn_rmsnorm_abs)[name = string("block_22_ffn_rmsnorm_maxval")]; fp16 block_22_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_22_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_22_ffn_rmsnorm_maxval_clipped = clip(alpha = block_22_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_22_ffn_rmsnorm_maxval_clipped_beta_0, x = block_22_ffn_rmsnorm_maxval)[name = string("block_22_ffn_rmsnorm_maxval_clipped")]; tensor block_22_ffn_rmsnorm_scaled = real_div(x = block_22_residual_1, y = block_22_ffn_rmsnorm_maxval_clipped)[name = string("block_22_ffn_rmsnorm_scaled")]; tensor block_22_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_22_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_22_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_22_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_22_ffn_rmsnorm_scaled)[name = string("block_22_ffn_rmsnorm_squared_sum")]; fp16 block_22_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_22_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_22_ffn_rmsnorm_rsqrt_epsilon_0, x = block_22_ffn_rmsnorm_squared_sum)[name = string("block_22_ffn_rmsnorm_rsqrt")]; fp16 block_22_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_22_ffn_rmsnorm_dim_scaled = mul(x = block_22_ffn_rmsnorm_scaled, y = block_22_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_22_ffn_rmsnorm_dim_scaled")]; tensor block_22_ffn_rmsnorm_normalized = mul(x = block_22_ffn_rmsnorm_dim_scaled, y = block_22_ffn_rmsnorm_rsqrt)[name = string("block_22_ffn_rmsnorm_normalized")]; tensor block_22_ffn_rmsnorm_y_0 = const()[name = string("block_22_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548220096)))]; tensor block_22_ffn_rmsnorm = mul(x = block_22_ffn_rmsnorm_normalized, y = block_22_ffn_rmsnorm_y_0)[name = string("block_22_ffn_rmsnorm")]; tensor block_22_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548221952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551490624))))[name = string("block_22_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_107 = constexpr_blockwise_shift_scale(data = block_22_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551646336))))[name = string("constexpr_blockwise_shift_scale_107")]; tensor block_22_ffn_inproj_strides_0 = const()[name = string("block_22_ffn_inproj_strides_0"), val = tensor([1])]; string block_22_ffn_inproj_pad_type_0 = const()[name = string("block_22_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_22_ffn_inproj_pad_0 = const()[name = string("block_22_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_22_ffn_inproj_dilations_0 = const()[name = string("block_22_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_22_ffn_inproj_groups_0 = const()[name = string("block_22_ffn_inproj_groups_0"), val = int32(1)]; tensor block_22_ffn_inproj = conv(dilations = block_22_ffn_inproj_dilations_0, groups = block_22_ffn_inproj_groups_0, pad = block_22_ffn_inproj_pad_0, pad_type = block_22_ffn_inproj_pad_type_0, strides = block_22_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_107, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_inproj")]; tensor block_22_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551656128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554924800))))[name = string("block_22_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_108 = constexpr_blockwise_shift_scale(data = block_22_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555080512))))[name = string("constexpr_blockwise_shift_scale_108")]; tensor block_22_ffn_g_strides_0 = const()[name = string("block_22_ffn_g_strides_0"), val = tensor([1])]; string block_22_ffn_g_pad_type_0 = const()[name = string("block_22_ffn_g_pad_type_0"), val = string("valid")]; tensor block_22_ffn_g_pad_0 = const()[name = string("block_22_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_22_ffn_g_dilations_0 = const()[name = string("block_22_ffn_g_dilations_0"), val = tensor([1])]; int32 block_22_ffn_g_groups_0 = const()[name = string("block_22_ffn_g_groups_0"), val = int32(1)]; tensor block_22_ffn_g = conv(dilations = block_22_ffn_g_dilations_0, groups = block_22_ffn_g_groups_0, pad = block_22_ffn_g_pad_0, pad_type = block_22_ffn_g_pad_type_0, strides = block_22_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_108, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_g")]; tensor block_22_ffn_g_activation = silu(x = block_22_ffn_g)[name = string("block_22_ffn_g_activation")]; tensor block_22_ffn_x_gated = mul(x = block_22_ffn_inproj, y = block_22_ffn_g_activation)[name = string("block_22_ffn_x_gated")]; tensor block_22_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555090304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558358976))))[name = string("block_22_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_109 = constexpr_blockwise_shift_scale(data = block_22_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558387712))))[name = string("constexpr_blockwise_shift_scale_109")]; tensor block_22_ffn_outproj_strides_0 = const()[name = string("block_22_ffn_outproj_strides_0"), val = tensor([1])]; string block_22_ffn_outproj_pad_type_0 = const()[name = string("block_22_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_22_ffn_outproj_pad_0 = const()[name = string("block_22_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_22_ffn_outproj_dilations_0 = const()[name = string("block_22_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_22_ffn_outproj_groups_0 = const()[name = string("block_22_ffn_outproj_groups_0"), val = int32(1)]; tensor block_22_ffn_outproj = conv(dilations = block_22_ffn_outproj_dilations_0, groups = block_22_ffn_outproj_groups_0, pad = block_22_ffn_outproj_pad_0, pad_type = block_22_ffn_outproj_pad_type_0, strides = block_22_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_109, x = block_22_ffn_x_gated)[name = string("block_22_ffn_outproj")]; tensor block_22_residual_2 = add(x = block_22_ffn_outproj, y = block_22_residual_1)[name = string("block_22_residual_2")]; tensor block_23_attention_rmsnorm_abs = abs(x = block_22_residual_2)[name = string("block_23_attention_rmsnorm_abs")]; tensor block_23_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_23_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_23_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_23_attention_rmsnorm_maxval = reduce_max(axes = block_23_attention_rmsnorm_maxval_axes_0, keep_dims = block_23_attention_rmsnorm_maxval_keep_dims_0, x = block_23_attention_rmsnorm_abs)[name = string("block_23_attention_rmsnorm_maxval")]; fp16 block_23_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_23_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_23_attention_rmsnorm_maxval_clipped = clip(alpha = block_23_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_23_attention_rmsnorm_maxval_clipped_beta_0, x = block_23_attention_rmsnorm_maxval)[name = string("block_23_attention_rmsnorm_maxval_clipped")]; tensor block_23_attention_rmsnorm_scaled = real_div(x = block_22_residual_2, y = block_23_attention_rmsnorm_maxval_clipped)[name = string("block_23_attention_rmsnorm_scaled")]; tensor block_23_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_23_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_23_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_23_attention_rmsnorm_squared_sum_keep_dims_0, x = block_23_attention_rmsnorm_scaled)[name = string("block_23_attention_rmsnorm_squared_sum")]; fp16 block_23_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_23_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_23_attention_rmsnorm_rsqrt_epsilon_0, x = block_23_attention_rmsnorm_squared_sum)[name = string("block_23_attention_rmsnorm_rsqrt")]; fp16 block_23_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_23_attention_rmsnorm_dim_scaled = mul(x = block_23_attention_rmsnorm_scaled, y = block_23_attention_rmsnorm_dim_scaled_y_0)[name = string("block_23_attention_rmsnorm_dim_scaled")]; tensor block_23_attention_rmsnorm_normalized = mul(x = block_23_attention_rmsnorm_dim_scaled, y = block_23_attention_rmsnorm_rsqrt)[name = string("block_23_attention_rmsnorm_normalized")]; tensor block_23_attention_rmsnorm_y_0 = const()[name = string("block_23_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558389568)))]; tensor block_23_attention_rmsnorm = mul(x = block_23_attention_rmsnorm_normalized, y = block_23_attention_rmsnorm_y_0)[name = string("block_23_attention_rmsnorm")]; tensor attention_23_qkvproj_weight_0 = const()[name = string("attention_23_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558391424)))]; tensor attention_23_qkvproj_bias_0 = const()[name = string("attention_23_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560455872)))]; tensor attention_23_qkvproj_strides_0 = const()[name = string("attention_23_qkvproj_strides_0"), val = tensor([1])]; string attention_23_qkvproj_pad_type_0 = const()[name = string("attention_23_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_23_qkvproj_pad_0 = const()[name = string("attention_23_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_23_qkvproj_dilations_0 = const()[name = string("attention_23_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_23_qkvproj_groups_0 = const()[name = string("attention_23_qkvproj_groups_0"), val = int32(1)]; tensor attention_23_qkvproj = conv(bias = attention_23_qkvproj_bias_0, dilations = attention_23_qkvproj_dilations_0, groups = attention_23_qkvproj_groups_0, pad = attention_23_qkvproj_pad_0, pad_type = attention_23_qkvproj_pad_type_0, strides = attention_23_qkvproj_strides_0, weight = attention_23_qkvproj_weight_0, x = block_23_attention_rmsnorm)[name = string("attention_23_qkvproj")]; tensor attention_23_head_reshape_shape_0 = const()[name = string("attention_23_head_reshape_shape_0"), val = tensor([1, 18, 64, 1])]; tensor attention_23_head_reshape = reshape(shape = attention_23_head_reshape_shape_0, x = attention_23_qkvproj)[name = string("attention_23_head_reshape")]; tensor attention_23_head_transpose_perm_0 = const()[name = string("attention_23_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_23_split_qkv_heads_axis_0 = const()[name = string("attention_23_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_23_split_qkv_heads_split_sizes_0 = const()[name = string("attention_23_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_23_head_transpose = transpose(perm = attention_23_head_transpose_perm_0, x = attention_23_head_reshape)[name = string("transpose_2")]; tensor attention_23_split_qkv_heads_0, tensor attention_23_split_qkv_heads_1, tensor attention_23_split_qkv_heads_2 = split(axis = attention_23_split_qkv_heads_axis_0, split_sizes = attention_23_split_qkv_heads_split_sizes_0, x = attention_23_head_transpose)[name = string("attention_23_split_qkv_heads")]; tensor attention_23_q_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_23_q_rope_lhs_mult")]; int32 attention_23_q_rotate_half_split_num_splits_0 = const()[name = string("attention_23_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_23_q_rotate_half_split_axis_0 = const()[name = string("attention_23_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_23_q_rotate_half_split_0, tensor attention_23_q_rotate_half_split_1 = split(axis = attention_23_q_rotate_half_split_axis_0, num_splits = attention_23_q_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_0)[name = string("attention_23_q_rotate_half_split")]; fp16 attention_23_q_rotate_half_neg_y_0 = const()[name = string("attention_23_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_23_q_rotate_half_neg = mul(x = attention_23_q_rotate_half_split_1, y = attention_23_q_rotate_half_neg_y_0)[name = string("attention_23_q_rotate_half_neg")]; int32 attention_23_q_rotate_half_concat_axis_0 = const()[name = string("attention_23_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_23_q_rotate_half_concat_interleave_0 = const()[name = string("attention_23_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_23_q_rotate_half_concat = concat(axis = attention_23_q_rotate_half_concat_axis_0, interleave = attention_23_q_rotate_half_concat_interleave_0, values = (attention_23_q_rotate_half_neg, attention_23_q_rotate_half_split_0))[name = string("attention_23_q_rotate_half_concat")]; tensor attention_23_q_rope_rhs_mult = mul(x = attention_23_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_q_rope_rhs_mult")]; tensor attention_23_q_rope = add(x = attention_23_q_rope_lhs_mult, y = attention_23_q_rope_rhs_mult)[name = string("attention_23_q_rope")]; tensor attention_23_k_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_23_k_rope_lhs_mult")]; int32 attention_23_k_rotate_half_split_num_splits_0 = const()[name = string("attention_23_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_23_k_rotate_half_split_axis_0 = const()[name = string("attention_23_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_23_k_rotate_half_split_0, tensor attention_23_k_rotate_half_split_1 = split(axis = attention_23_k_rotate_half_split_axis_0, num_splits = attention_23_k_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_1)[name = string("attention_23_k_rotate_half_split")]; fp16 attention_23_k_rotate_half_neg_y_0 = const()[name = string("attention_23_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_23_k_rotate_half_neg = mul(x = attention_23_k_rotate_half_split_1, y = attention_23_k_rotate_half_neg_y_0)[name = string("attention_23_k_rotate_half_neg")]; int32 attention_23_k_rotate_half_concat_axis_0 = const()[name = string("attention_23_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_23_k_rotate_half_concat_interleave_0 = const()[name = string("attention_23_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_23_k_rotate_half_concat = concat(axis = attention_23_k_rotate_half_concat_axis_0, interleave = attention_23_k_rotate_half_concat_interleave_0, values = (attention_23_k_rotate_half_neg, attention_23_k_rotate_half_split_0))[name = string("attention_23_k_rotate_half_concat")]; tensor attention_23_k_rope_rhs_mult = mul(x = attention_23_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_k_rope_rhs_mult")]; tensor attention_23_k_rope = add(x = attention_23_k_rope_lhs_mult, y = attention_23_k_rope_rhs_mult)[name = string("attention_23_k_rope")]; int32 attention_23_q_splits_axis_0 = const()[name = string("attention_23_q_splits_axis_0"), val = int32(1)]; int32 attention_23_q_splits_num_splits_0 = const()[name = string("attention_23_q_splits_num_splits_0"), val = int32(2)]; tensor attention_23_q_splits_0, tensor attention_23_q_splits_1 = split(axis = attention_23_q_splits_axis_0, num_splits = attention_23_q_splits_num_splits_0, x = attention_23_q_rope)[name = string("attention_23_q_splits")]; tensor attention_23_update_begin_0_values0_0 = const()[name = string("attention_23_update_begin_0_values0_0"), val = tensor([23])]; tensor attention_23_update_begin_0_values1_0 = const()[name = string("attention_23_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_23_update_begin_0_values3_0 = const()[name = string("attention_23_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_23_update_begin_0_axis_0 = const()[name = string("attention_23_update_begin_0_axis_0"), val = int32(0)]; bool attention_23_update_begin_0_interleave_0 = const()[name = string("attention_23_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_23_update_begin_0 = concat(axis = attention_23_update_begin_0_axis_0, interleave = attention_23_update_begin_0_interleave_0, values = (attention_23_update_begin_0_values0_0, attention_23_update_begin_0_values1_0, query_pos1, attention_23_update_begin_0_values3_0))[name = string("attention_23_update_begin_0")]; tensor attention_23_update_end_0_values0_0 = const()[name = string("attention_23_update_end_0_values0_0"), val = tensor([24])]; tensor attention_23_update_end_0_values1_0 = const()[name = string("attention_23_update_end_0_values1_0"), val = tensor([2])]; tensor attention_23_update_end_0_values3_0 = const()[name = string("attention_23_update_end_0_values3_0"), val = tensor([64])]; int32 attention_23_update_end_0_axis_0 = const()[name = string("attention_23_update_end_0_axis_0"), val = int32(0)]; bool attention_23_update_end_0_interleave_0 = const()[name = string("attention_23_update_end_0_interleave_0"), val = bool(false)]; tensor attention_23_update_end_0 = concat(axis = attention_23_update_end_0_axis_0, interleave = attention_23_update_end_0_interleave_0, values = (attention_23_update_end_0_values0_0, attention_23_update_end_0_values1_0, end_pos_0, attention_23_update_end_0_values3_0))[name = string("attention_23_update_end_0")]; tensor attention_23_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_updated_key_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_key_cache_0_squeeze_mask_0, update = attention_23_k_rope, x = coreml_update_state_44)[name = string("attention_23_updated_key_cache_0")]; write_state(data = attention_23_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = key_cache_state)[name = string("coreml_update_state_46")]; tensor attention_23_key_cache_begin_0 = const()[name = string("attention_23_key_cache_begin_0"), val = tensor([23, 0, 0, 0])]; tensor attention_23_key_cache_end_0 = const()[name = string("attention_23_key_cache_end_0"), val = tensor([24, 2, 512, 64])]; tensor attention_23_key_cache_squeeze_mask_0 = const()[name = string("attention_23_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_key_cache = slice_by_index(begin = attention_23_key_cache_begin_0, end = attention_23_key_cache_end_0, squeeze_mask = attention_23_key_cache_squeeze_mask_0, x = coreml_update_state_46)[name = string("attention_23_key_cache")]; int32 attention_23_key_cache_head_axis_0 = const()[name = string("attention_23_key_cache_head_axis_0"), val = int32(1)]; int32 attention_23_key_cache_head_num_splits_0 = const()[name = string("attention_23_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_23_key_cache_head_0, tensor attention_23_key_cache_head_1 = split(axis = attention_23_key_cache_head_axis_0, num_splits = attention_23_key_cache_head_num_splits_0, x = attention_23_key_cache)[name = string("attention_23_key_cache_head")]; tensor attention_23_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_updated_value_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_value_cache_0_squeeze_mask_0, update = attention_23_split_qkv_heads_2, x = coreml_update_state_45)[name = string("attention_23_updated_value_cache_0")]; write_state(data = attention_23_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = value_cache_state)[name = string("coreml_update_state_47")]; tensor attention_23_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_23_slice_current_layer_value_cache_begin_0"), val = tensor([23, 0, 0, 0])]; tensor attention_23_slice_current_layer_value_cache_end_0 = const()[name = string("attention_23_slice_current_layer_value_cache_end_0"), val = tensor([24, 2, 512, 64])]; tensor attention_23_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_23_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_slice_current_layer_value_cache = slice_by_index(begin = attention_23_slice_current_layer_value_cache_begin_0, end = attention_23_slice_current_layer_value_cache_end_0, squeeze_mask = attention_23_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_47)[name = string("attention_23_slice_current_layer_value_cache")]; int32 attention_23_slice_value_cache_heads_axis_0 = const()[name = string("attention_23_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_23_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_23_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_23_slice_value_cache_heads_0, tensor attention_23_slice_value_cache_heads_1 = split(axis = attention_23_slice_value_cache_heads_axis_0, num_splits = attention_23_slice_value_cache_heads_num_splits_0, x = attention_23_slice_current_layer_value_cache)[name = string("attention_23_slice_value_cache_heads")]; bool attention_23_scores_0_transpose_y_0 = const()[name = string("attention_23_scores_0_transpose_y_0"), val = bool(true)]; bool attention_23_scores_0_transpose_x_0 = const()[name = string("attention_23_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_23_scores_0 = matmul(transpose_x = attention_23_scores_0_transpose_x_0, transpose_y = attention_23_scores_0_transpose_y_0, x = attention_23_key_cache_head_0, y = attention_23_q_splits_0)[name = string("attention_23_scores_0")]; fp16 attention_23_scaled_scores_0_y_0 = const()[name = string("attention_23_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_23_scaled_scores_0 = mul(x = attention_23_scores_0, y = attention_23_scaled_scores_0_y_0)[name = string("attention_23_scaled_scores_0")]; tensor attention_23_masked_scaled_scores_0 = add(x = attention_23_scaled_scores_0, y = transpose_0)[name = string("attention_23_masked_scaled_scores_0")]; int32 softmax_46_axis_0 = const()[name = string("softmax_46_axis_0"), val = int32(-2)]; tensor softmax_46 = softmax(axis = softmax_46_axis_0, x = attention_23_masked_scaled_scores_0)[name = string("softmax_46")]; bool attention_23_attention_0_transpose_x_0 = const()[name = string("attention_23_attention_0_transpose_x_0"), val = bool(true)]; bool attention_23_attention_0_transpose_y_0 = const()[name = string("attention_23_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_23_attention_0 = matmul(transpose_x = attention_23_attention_0_transpose_x_0, transpose_y = attention_23_attention_0_transpose_y_0, x = softmax_46, y = attention_23_slice_value_cache_heads_0)[name = string("attention_23_attention_0")]; bool attention_23_scores_1_transpose_y_0 = const()[name = string("attention_23_scores_1_transpose_y_0"), val = bool(true)]; bool attention_23_scores_1_transpose_x_0 = const()[name = string("attention_23_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_23_scores_1 = matmul(transpose_x = attention_23_scores_1_transpose_x_0, transpose_y = attention_23_scores_1_transpose_y_0, x = attention_23_key_cache_head_1, y = attention_23_q_splits_1)[name = string("attention_23_scores_1")]; fp16 attention_23_scaled_scores_1_y_0 = const()[name = string("attention_23_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_23_scaled_scores_1 = mul(x = attention_23_scores_1, y = attention_23_scaled_scores_1_y_0)[name = string("attention_23_scaled_scores_1")]; tensor attention_23_masked_scaled_scores_1 = add(x = attention_23_scaled_scores_1, y = transpose_0)[name = string("attention_23_masked_scaled_scores_1")]; int32 softmax_47_axis_0 = const()[name = string("softmax_47_axis_0"), val = int32(-2)]; tensor softmax_47 = softmax(axis = softmax_47_axis_0, x = attention_23_masked_scaled_scores_1)[name = string("softmax_47")]; bool attention_23_attention_1_transpose_x_0 = const()[name = string("attention_23_attention_1_transpose_x_0"), val = bool(true)]; bool attention_23_attention_1_transpose_y_0 = const()[name = string("attention_23_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_23_attention_1 = matmul(transpose_x = attention_23_attention_1_transpose_x_0, transpose_y = attention_23_attention_1_transpose_y_0, x = softmax_47, y = attention_23_slice_value_cache_heads_1)[name = string("attention_23_attention_1")]; int32 attention_23_concat_attention_all_heads_axis_0 = const()[name = string("attention_23_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_23_concat_attention_all_heads_interleave_0 = const()[name = string("attention_23_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_23_concat_attention_all_heads = concat(axis = attention_23_concat_attention_all_heads_axis_0, interleave = attention_23_concat_attention_all_heads_interleave_0, values = (attention_23_attention_0, attention_23_attention_1))[name = string("attention_23_concat_attention_all_heads")]; tensor attention_23_channels_first_retransposed_perm_0 = const()[name = string("attention_23_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_23_reshaped_shape_0 = const()[name = string("attention_23_reshaped_shape_0"), val = tensor([1, 896, 1])]; tensor attention_23_channels_first_retransposed = transpose(perm = attention_23_channels_first_retransposed_perm_0, x = attention_23_concat_attention_all_heads)[name = string("transpose_1")]; tensor attention_23_reshaped = reshape(shape = attention_23_reshaped_shape_0, x = attention_23_channels_first_retransposed)[name = string("attention_23_reshaped")]; tensor attention_23_outproj_weight_0 = const()[name = string("attention_23_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560458240)))]; tensor attention_23_outproj_strides_0 = const()[name = string("attention_23_outproj_strides_0"), val = tensor([1])]; string attention_23_outproj_pad_type_0 = const()[name = string("attention_23_outproj_pad_type_0"), val = string("valid")]; tensor attention_23_outproj_pad_0 = const()[name = string("attention_23_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_23_outproj_dilations_0 = const()[name = string("attention_23_outproj_dilations_0"), val = tensor([1])]; int32 attention_23_outproj_groups_0 = const()[name = string("attention_23_outproj_groups_0"), val = int32(1)]; tensor attention_23_outproj = conv(dilations = attention_23_outproj_dilations_0, groups = attention_23_outproj_groups_0, pad = attention_23_outproj_pad_0, pad_type = attention_23_outproj_pad_type_0, strides = attention_23_outproj_strides_0, weight = attention_23_outproj_weight_0, x = attention_23_reshaped)[name = string("attention_23_outproj")]; tensor block_23_residual_1 = add(x = block_22_residual_2, y = attention_23_outproj)[name = string("block_23_residual_1")]; tensor block_23_ffn_rmsnorm_abs = abs(x = block_23_residual_1)[name = string("block_23_ffn_rmsnorm_abs")]; tensor block_23_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_23_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_23_ffn_rmsnorm_maxval = reduce_max(axes = block_23_ffn_rmsnorm_maxval_axes_0, keep_dims = block_23_ffn_rmsnorm_maxval_keep_dims_0, x = block_23_ffn_rmsnorm_abs)[name = string("block_23_ffn_rmsnorm_maxval")]; fp16 block_23_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_23_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_23_ffn_rmsnorm_maxval_clipped = clip(alpha = block_23_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_23_ffn_rmsnorm_maxval_clipped_beta_0, x = block_23_ffn_rmsnorm_maxval)[name = string("block_23_ffn_rmsnorm_maxval_clipped")]; tensor block_23_ffn_rmsnorm_scaled = real_div(x = block_23_residual_1, y = block_23_ffn_rmsnorm_maxval_clipped)[name = string("block_23_ffn_rmsnorm_scaled")]; tensor block_23_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_23_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_23_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_23_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_23_ffn_rmsnorm_scaled)[name = string("block_23_ffn_rmsnorm_squared_sum")]; fp16 block_23_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_23_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_23_ffn_rmsnorm_rsqrt_epsilon_0, x = block_23_ffn_rmsnorm_squared_sum)[name = string("block_23_ffn_rmsnorm_rsqrt")]; fp16 block_23_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_23_ffn_rmsnorm_dim_scaled = mul(x = block_23_ffn_rmsnorm_scaled, y = block_23_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_23_ffn_rmsnorm_dim_scaled")]; tensor block_23_ffn_rmsnorm_normalized = mul(x = block_23_ffn_rmsnorm_dim_scaled, y = block_23_ffn_rmsnorm_rsqrt)[name = string("block_23_ffn_rmsnorm_normalized")]; tensor block_23_ffn_rmsnorm_y_0 = const()[name = string("block_23_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063936)))]; tensor block_23_ffn_rmsnorm = mul(x = block_23_ffn_rmsnorm_normalized, y = block_23_ffn_rmsnorm_y_0)[name = string("block_23_ffn_rmsnorm")]; tensor block_23_ffn_inproj_weight_0 = const()[name = string("block_23_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065792)))]; tensor block_23_ffn_inproj_strides_0 = const()[name = string("block_23_ffn_inproj_strides_0"), val = tensor([1])]; string block_23_ffn_inproj_pad_type_0 = const()[name = string("block_23_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_23_ffn_inproj_pad_0 = const()[name = string("block_23_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_23_ffn_inproj_dilations_0 = const()[name = string("block_23_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_23_ffn_inproj_groups_0 = const()[name = string("block_23_ffn_inproj_groups_0"), val = int32(1)]; tensor block_23_ffn_inproj = conv(dilations = block_23_ffn_inproj_dilations_0, groups = block_23_ffn_inproj_groups_0, pad = block_23_ffn_inproj_pad_0, pad_type = block_23_ffn_inproj_pad_type_0, strides = block_23_ffn_inproj_strides_0, weight = block_23_ffn_inproj_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_inproj")]; tensor block_23_ffn_g_weight_0 = const()[name = string("block_23_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570782144)))]; tensor block_23_ffn_g_strides_0 = const()[name = string("block_23_ffn_g_strides_0"), val = tensor([1])]; string block_23_ffn_g_pad_type_0 = const()[name = string("block_23_ffn_g_pad_type_0"), val = string("valid")]; tensor block_23_ffn_g_pad_0 = const()[name = string("block_23_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_23_ffn_g_dilations_0 = const()[name = string("block_23_ffn_g_dilations_0"), val = tensor([1])]; int32 block_23_ffn_g_groups_0 = const()[name = string("block_23_ffn_g_groups_0"), val = int32(1)]; tensor block_23_ffn_g = conv(dilations = block_23_ffn_g_dilations_0, groups = block_23_ffn_g_groups_0, pad = block_23_ffn_g_pad_0, pad_type = block_23_ffn_g_pad_type_0, strides = block_23_ffn_g_strides_0, weight = block_23_ffn_g_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_g")]; tensor block_23_ffn_g_activation = silu(x = block_23_ffn_g)[name = string("block_23_ffn_g_activation")]; tensor block_23_ffn_x_gated = mul(x = block_23_ffn_inproj, y = block_23_ffn_g_activation)[name = string("block_23_ffn_x_gated")]; tensor block_23_ffn_outproj_weight_0 = const()[name = string("block_23_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(579498496)))]; tensor block_23_ffn_outproj_strides_0 = const()[name = string("block_23_ffn_outproj_strides_0"), val = tensor([1])]; string block_23_ffn_outproj_pad_type_0 = const()[name = string("block_23_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_23_ffn_outproj_pad_0 = const()[name = string("block_23_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_23_ffn_outproj_dilations_0 = const()[name = string("block_23_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_23_ffn_outproj_groups_0 = const()[name = string("block_23_ffn_outproj_groups_0"), val = int32(1)]; tensor block_23_ffn_outproj = conv(dilations = block_23_ffn_outproj_dilations_0, groups = block_23_ffn_outproj_groups_0, pad = block_23_ffn_outproj_pad_0, pad_type = block_23_ffn_outproj_pad_type_0, strides = block_23_ffn_outproj_strides_0, weight = block_23_ffn_outproj_weight_0, x = block_23_ffn_x_gated)[name = string("block_23_ffn_outproj")]; tensor block_23_residual_2 = add(x = block_23_ffn_outproj, y = block_23_residual_1)[name = string("block_23_residual_2")]; tensor final_norm_rmsnorm_abs = abs(x = block_23_residual_2)[name = string("final_norm_rmsnorm_abs")]; tensor final_norm_rmsnorm_maxval_axes_0 = const()[name = string("final_norm_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool final_norm_rmsnorm_maxval_keep_dims_0 = const()[name = string("final_norm_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor final_norm_rmsnorm_maxval = reduce_max(axes = final_norm_rmsnorm_maxval_axes_0, keep_dims = final_norm_rmsnorm_maxval_keep_dims_0, x = final_norm_rmsnorm_abs)[name = string("final_norm_rmsnorm_maxval")]; fp16 final_norm_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 final_norm_rmsnorm_maxval_clipped_beta_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor final_norm_rmsnorm_maxval_clipped = clip(alpha = final_norm_rmsnorm_maxval_clipped_alpha_0, beta = final_norm_rmsnorm_maxval_clipped_beta_0, x = final_norm_rmsnorm_maxval)[name = string("final_norm_rmsnorm_maxval_clipped")]; tensor final_norm_rmsnorm_scaled = real_div(x = block_23_residual_2, y = final_norm_rmsnorm_maxval_clipped)[name = string("final_norm_rmsnorm_scaled")]; tensor final_norm_rmsnorm_squared_sum_axes_0 = const()[name = string("final_norm_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool final_norm_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("final_norm_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor final_norm_rmsnorm_squared_sum = reduce_sum_square(axes = final_norm_rmsnorm_squared_sum_axes_0, keep_dims = final_norm_rmsnorm_squared_sum_keep_dims_0, x = final_norm_rmsnorm_scaled)[name = string("final_norm_rmsnorm_squared_sum")]; fp16 final_norm_rmsnorm_rsqrt_epsilon_0 = const()[name = string("final_norm_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor final_norm_rmsnorm_rsqrt = rsqrt(epsilon = final_norm_rmsnorm_rsqrt_epsilon_0, x = final_norm_rmsnorm_squared_sum)[name = string("final_norm_rmsnorm_rsqrt")]; fp16 final_norm_rmsnorm_dim_scaled_y_0 = const()[name = string("final_norm_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor final_norm_rmsnorm_dim_scaled = mul(x = final_norm_rmsnorm_scaled, y = final_norm_rmsnorm_dim_scaled_y_0)[name = string("final_norm_rmsnorm_dim_scaled")]; tensor final_norm_rmsnorm_normalized = mul(x = final_norm_rmsnorm_dim_scaled, y = final_norm_rmsnorm_rsqrt)[name = string("final_norm_rmsnorm_normalized")]; tensor final_norm_rmsnorm_y_0 = const()[name = string("final_norm_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588214848)))]; tensor final_norm_rmsnorm = mul(x = final_norm_rmsnorm_normalized, y = final_norm_rmsnorm_y_0)[name = string("final_norm_rmsnorm")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588216704)))]; tensor logits_0_strides_0 = const()[name = string("logits_0_strides_0"), val = tensor([1])]; string logits_0_pad_type_0 = const()[name = string("logits_0_pad_type_0"), val = string("valid")]; tensor logits_0_pad_0 = const()[name = string("logits_0_pad_0"), val = tensor([0, 0])]; tensor logits_0_dilations_0 = const()[name = string("logits_0_dilations_0"), val = tensor([1])]; int32 logits_0_groups_0 = const()[name = string("logits_0_groups_0"), val = int32(1)]; tensor logits_0 = conv(dilations = logits_0_dilations_0, groups = logits_0_groups_0, pad = logits_0_pad_0, pad_type = logits_0_pad_type_0, strides = logits_0_strides_0, weight = expand_dims_1, x = final_norm_rmsnorm)[name = string("logits_0")]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617576896)))]; tensor logits_1_strides_0 = const()[name = string("logits_1_strides_0"), val = tensor([1])]; string logits_1_pad_type_0 = const()[name = string("logits_1_pad_type_0"), val = string("valid")]; tensor logits_1_pad_0 = const()[name = string("logits_1_pad_0"), val = tensor([0, 0])]; tensor logits_1_dilations_0 = const()[name = string("logits_1_dilations_0"), val = tensor([1])]; int32 logits_1_groups_0 = const()[name = string("logits_1_groups_0"), val = int32(1)]; tensor logits_1 = conv(dilations = logits_1_dilations_0, groups = logits_1_groups_0, pad = logits_1_pad_0, pad_type = logits_1_pad_type_0, strides = logits_1_strides_0, weight = expand_dims_2, x = final_norm_rmsnorm)[name = string("logits_1")]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646937088)))]; tensor logits_2_strides_0 = const()[name = string("logits_2_strides_0"), val = tensor([1])]; string logits_2_pad_type_0 = const()[name = string("logits_2_pad_type_0"), val = string("valid")]; tensor logits_2_pad_0 = const()[name = string("logits_2_pad_0"), val = tensor([0, 0])]; tensor logits_2_dilations_0 = const()[name = string("logits_2_dilations_0"), val = tensor([1])]; int32 logits_2_groups_0 = const()[name = string("logits_2_groups_0"), val = int32(1)]; tensor logits_2 = conv(dilations = logits_2_dilations_0, groups = logits_2_groups_0, pad = logits_2_pad_0, pad_type = logits_2_pad_type_0, strides = logits_2_strides_0, weight = expand_dims_3, x = final_norm_rmsnorm)[name = string("logits_2")]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676297280)))]; tensor logits_3_strides_0 = const()[name = string("logits_3_strides_0"), val = tensor([1])]; string logits_3_pad_type_0 = const()[name = string("logits_3_pad_type_0"), val = string("valid")]; tensor logits_3_pad_0 = const()[name = string("logits_3_pad_0"), val = tensor([0, 0])]; tensor logits_3_dilations_0 = const()[name = string("logits_3_dilations_0"), val = tensor([1])]; int32 logits_3_groups_0 = const()[name = string("logits_3_groups_0"), val = int32(1)]; tensor logits_3 = conv(dilations = logits_3_dilations_0, groups = logits_3_groups_0, pad = logits_3_pad_0, pad_type = logits_3_pad_type_0, strides = logits_3_strides_0, weight = expand_dims_4, x = final_norm_rmsnorm)[name = string("logits_3")]; tensor expand_dims_5 = const()[name = string("expand_dims_5"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705657472)))]; tensor logits_4_strides_0 = const()[name = string("logits_4_strides_0"), val = tensor([1])]; string logits_4_pad_type_0 = const()[name = string("logits_4_pad_type_0"), val = string("valid")]; tensor logits_4_pad_0 = const()[name = string("logits_4_pad_0"), val = tensor([0, 0])]; tensor logits_4_dilations_0 = const()[name = string("logits_4_dilations_0"), val = tensor([1])]; int32 logits_4_groups_0 = const()[name = string("logits_4_groups_0"), val = int32(1)]; tensor logits_4 = conv(dilations = logits_4_dilations_0, groups = logits_4_groups_0, pad = logits_4_pad_0, pad_type = logits_4_pad_type_0, strides = logits_4_strides_0, weight = expand_dims_5, x = final_norm_rmsnorm)[name = string("logits_4")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735017664)))]; tensor logits_5_strides_0 = const()[name = string("logits_5_strides_0"), val = tensor([1])]; string logits_5_pad_type_0 = const()[name = string("logits_5_pad_type_0"), val = string("valid")]; tensor logits_5_pad_0 = const()[name = string("logits_5_pad_0"), val = tensor([0, 0])]; tensor logits_5_dilations_0 = const()[name = string("logits_5_dilations_0"), val = tensor([1])]; int32 logits_5_groups_0 = const()[name = string("logits_5_groups_0"), val = int32(1)]; tensor logits_5 = conv(dilations = logits_5_dilations_0, groups = logits_5_groups_0, pad = logits_5_pad_0, pad_type = logits_5_pad_type_0, strides = logits_5_strides_0, weight = expand_dims_6, x = final_norm_rmsnorm)[name = string("logits_5")]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764377856)))]; tensor logits_6_strides_0 = const()[name = string("logits_6_strides_0"), val = tensor([1])]; string logits_6_pad_type_0 = const()[name = string("logits_6_pad_type_0"), val = string("valid")]; tensor logits_6_pad_0 = const()[name = string("logits_6_pad_0"), val = tensor([0, 0])]; tensor logits_6_dilations_0 = const()[name = string("logits_6_dilations_0"), val = tensor([1])]; int32 logits_6_groups_0 = const()[name = string("logits_6_groups_0"), val = int32(1)]; tensor logits_6 = conv(dilations = logits_6_dilations_0, groups = logits_6_groups_0, pad = logits_6_pad_0, pad_type = logits_6_pad_type_0, strides = logits_6_strides_0, weight = expand_dims_7, x = final_norm_rmsnorm)[name = string("logits_6")]; tensor expand_dims_8 = const()[name = string("expand_dims_8"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793738048)))]; tensor logits_7_strides_0 = const()[name = string("logits_7_strides_0"), val = tensor([1])]; string logits_7_pad_type_0 = const()[name = string("logits_7_pad_type_0"), val = string("valid")]; tensor logits_7_pad_0 = const()[name = string("logits_7_pad_0"), val = tensor([0, 0])]; tensor logits_7_dilations_0 = const()[name = string("logits_7_dilations_0"), val = tensor([1])]; int32 logits_7_groups_0 = const()[name = string("logits_7_groups_0"), val = int32(1)]; tensor logits_7 = conv(dilations = logits_7_dilations_0, groups = logits_7_groups_0, pad = logits_7_pad_0, pad_type = logits_7_pad_type_0, strides = logits_7_strides_0, weight = expand_dims_8, x = final_norm_rmsnorm)[name = string("logits_7")]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823098240)))]; tensor logits_8_strides_0 = const()[name = string("logits_8_strides_0"), val = tensor([1])]; string logits_8_pad_type_0 = const()[name = string("logits_8_pad_type_0"), val = string("valid")]; tensor logits_8_pad_0 = const()[name = string("logits_8_pad_0"), val = tensor([0, 0])]; tensor logits_8_dilations_0 = const()[name = string("logits_8_dilations_0"), val = tensor([1])]; int32 logits_8_groups_0 = const()[name = string("logits_8_groups_0"), val = int32(1)]; tensor logits_8 = conv(dilations = logits_8_dilations_0, groups = logits_8_groups_0, pad = logits_8_pad_0, pad_type = logits_8_pad_type_0, strides = logits_8_strides_0, weight = expand_dims_9, x = final_norm_rmsnorm)[name = string("logits_8")]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852458432)))]; tensor logits_9_strides_0 = const()[name = string("logits_9_strides_0"), val = tensor([1])]; string logits_9_pad_type_0 = const()[name = string("logits_9_pad_type_0"), val = string("valid")]; tensor logits_9_pad_0 = const()[name = string("logits_9_pad_0"), val = tensor([0, 0])]; tensor logits_9_dilations_0 = const()[name = string("logits_9_dilations_0"), val = tensor([1])]; int32 logits_9_groups_0 = const()[name = string("logits_9_groups_0"), val = int32(1)]; tensor logits_9 = conv(dilations = logits_9_dilations_0, groups = logits_9_groups_0, pad = logits_9_pad_0, pad_type = logits_9_pad_type_0, strides = logits_9_strides_0, weight = expand_dims_10, x = final_norm_rmsnorm)[name = string("logits_9")]; int32 _logits_axis_0 = const()[name = string("_logits_axis_0"), val = int32(1)]; bool _logits_interleave_0 = const()[name = string("_logits_interleave_0"), val = bool(false)]; tensor _logits = concat(axis = _logits_axis_0, interleave = _logits_interleave_0, values = (logits_0, logits_1, logits_2, logits_3, logits_4, logits_5, logits_6, logits_7, logits_8, logits_9))[name = string("_logits")]; string logits_dtype_0 = const()[name = string("logits_dtype_0"), val = string("fp32")]; tensor logits = cast(dtype = logits_dtype_0, x = _logits)[name = string("cast_0")]; } -> (logits); func length_64(tensor input_ids, state> key_cache_state, tensor query_pos1, state> value_cache_state) { tensor expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor([-1, -2])]; tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = query_pos1)[name = string("expand_dims_0")]; tensor add_0_x_0 = const()[name = string("add_0_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(860486656)))]; tensor add_0 = add(x = add_0_x_0, y = expand_dims_0)[name = string("add_0")]; tensor mask_gather_x_0 = const()[name = string("mask_gather_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; int32 mask_gather_axis_0 = const()[name = string("mask_gather_axis_0"), val = int32(0)]; int32 mask_gather_batch_dims_0 = const()[name = string("mask_gather_batch_dims_0"), val = int32(0)]; bool mask_gather_validate_indices_0 = const()[name = string("mask_gather_validate_indices_0"), val = bool(false)]; tensor mask_gather = gather(axis = mask_gather_axis_0, batch_dims = mask_gather_batch_dims_0, indices = add_0, validate_indices = mask_gather_validate_indices_0, x = mask_gather_x_0)[name = string("mask_gather")]; tensor transpose_0_perm_0 = const()[name = string("transpose_0_perm_0"), val = tensor([0, 1, 3, 2])]; tensor query_sin_emb_x_0 = const()[name = string("query_sin_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524416)))]; int32 query_sin_emb_axis_0 = const()[name = string("query_sin_emb_axis_0"), val = int32(0)]; int32 query_sin_emb_batch_dims_0 = const()[name = string("query_sin_emb_batch_dims_0"), val = int32(0)]; bool query_sin_emb_validate_indices_0 = const()[name = string("query_sin_emb_validate_indices_0"), val = bool(false)]; tensor query_sin_emb = gather(axis = query_sin_emb_axis_0, batch_dims = query_sin_emb_batch_dims_0, indices = add_0, validate_indices = query_sin_emb_validate_indices_0, x = query_sin_emb_x_0)[name = string("query_sin_emb")]; tensor query_cos_emb_x_0 = const()[name = string("query_cos_emb_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(590016)))]; int32 query_cos_emb_axis_0 = const()[name = string("query_cos_emb_axis_0"), val = int32(0)]; int32 query_cos_emb_batch_dims_0 = const()[name = string("query_cos_emb_batch_dims_0"), val = int32(0)]; bool query_cos_emb_validate_indices_0 = const()[name = string("query_cos_emb_validate_indices_0"), val = bool(false)]; tensor query_cos_emb = gather(axis = query_cos_emb_axis_0, batch_dims = query_cos_emb_batch_dims_0, indices = add_0, validate_indices = query_cos_emb_validate_indices_0, x = query_cos_emb_x_0)[name = string("query_cos_emb")]; tensor token_embedding_x_0 = const()[name = string("token_embedding_x_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(655616)))]; int32 token_embedding_axis_0 = const()[name = string("token_embedding_axis_0"), val = int32(0)]; int32 token_embedding_batch_dims_0 = const()[name = string("token_embedding_batch_dims_0"), val = int32(0)]; bool token_embedding_validate_indices_0 = const()[name = string("token_embedding_validate_indices_0"), val = bool(false)]; tensor token_embedding = gather(axis = token_embedding_axis_0, batch_dims = token_embedding_batch_dims_0, indices = input_ids, validate_indices = token_embedding_validate_indices_0, x = token_embedding_x_0)[name = string("token_embedding")]; tensor input_embeddings_channels_first_perm_0 = const()[name = string("input_embeddings_channels_first_perm_0"), val = tensor([0, 2, 1])]; int32 end_pos_0_x_0 = const()[name = string("end_pos_0_x_0"), val = int32(64)]; tensor end_pos_0 = add(x = end_pos_0_x_0, y = query_pos1)[name = string("end_pos_0")]; tensor read_state_0 = read_state(input = key_cache_state)[name = string("read_state_0")]; tensor read_state_1 = read_state(input = value_cache_state)[name = string("read_state_1")]; tensor input_embeddings_channels_first = transpose(perm = input_embeddings_channels_first_perm_0, x = token_embedding)[name = string("transpose_49")]; tensor block_0_attention_rmsnorm_abs = abs(x = input_embeddings_channels_first)[name = string("block_0_attention_rmsnorm_abs")]; tensor block_0_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_0_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_0_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_0_attention_rmsnorm_maxval = reduce_max(axes = block_0_attention_rmsnorm_maxval_axes_0, keep_dims = block_0_attention_rmsnorm_maxval_keep_dims_0, x = block_0_attention_rmsnorm_abs)[name = string("block_0_attention_rmsnorm_maxval")]; fp16 block_0_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_0_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_0_attention_rmsnorm_maxval_clipped = clip(alpha = block_0_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_0_attention_rmsnorm_maxval_clipped_beta_0, x = block_0_attention_rmsnorm_maxval)[name = string("block_0_attention_rmsnorm_maxval_clipped")]; tensor block_0_attention_rmsnorm_scaled = real_div(x = input_embeddings_channels_first, y = block_0_attention_rmsnorm_maxval_clipped)[name = string("block_0_attention_rmsnorm_scaled")]; tensor block_0_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_0_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_0_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_0_attention_rmsnorm_squared_sum_keep_dims_0, x = block_0_attention_rmsnorm_scaled)[name = string("block_0_attention_rmsnorm_squared_sum")]; fp16 block_0_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_0_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_0_attention_rmsnorm_rsqrt_epsilon_0, x = block_0_attention_rmsnorm_squared_sum)[name = string("block_0_attention_rmsnorm_rsqrt")]; fp16 block_0_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_0_attention_rmsnorm_dim_scaled = mul(x = block_0_attention_rmsnorm_scaled, y = block_0_attention_rmsnorm_dim_scaled_y_0)[name = string("block_0_attention_rmsnorm_dim_scaled")]; tensor block_0_attention_rmsnorm_normalized = mul(x = block_0_attention_rmsnorm_dim_scaled, y = block_0_attention_rmsnorm_rsqrt)[name = string("block_0_attention_rmsnorm_normalized")]; tensor block_0_attention_rmsnorm_y_0 = const()[name = string("block_0_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272924992)))]; tensor block_0_attention_rmsnorm = mul(x = block_0_attention_rmsnorm_normalized, y = block_0_attention_rmsnorm_y_0)[name = string("block_0_attention_rmsnorm")]; tensor attention_0_qkvproj_weight_0 = const()[name = string("attention_0_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272926848)))]; tensor attention_0_qkvproj_bias_0 = const()[name = string("attention_0_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274991296)))]; tensor attention_0_qkvproj_strides_0 = const()[name = string("attention_0_qkvproj_strides_0"), val = tensor([1])]; string attention_0_qkvproj_pad_type_0 = const()[name = string("attention_0_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_0_qkvproj_pad_0 = const()[name = string("attention_0_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_0_qkvproj_dilations_0 = const()[name = string("attention_0_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_0_qkvproj_groups_0 = const()[name = string("attention_0_qkvproj_groups_0"), val = int32(1)]; tensor attention_0_qkvproj = conv(bias = attention_0_qkvproj_bias_0, dilations = attention_0_qkvproj_dilations_0, groups = attention_0_qkvproj_groups_0, pad = attention_0_qkvproj_pad_0, pad_type = attention_0_qkvproj_pad_type_0, strides = attention_0_qkvproj_strides_0, weight = attention_0_qkvproj_weight_0, x = block_0_attention_rmsnorm)[name = string("attention_0_qkvproj")]; tensor attention_0_head_reshape_shape_0 = const()[name = string("attention_0_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_0_head_reshape = reshape(shape = attention_0_head_reshape_shape_0, x = attention_0_qkvproj)[name = string("attention_0_head_reshape")]; tensor attention_0_head_transpose_perm_0 = const()[name = string("attention_0_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_0_split_qkv_heads_axis_0 = const()[name = string("attention_0_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_0_split_qkv_heads_split_sizes_0 = const()[name = string("attention_0_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_0_head_transpose = transpose(perm = attention_0_head_transpose_perm_0, x = attention_0_head_reshape)[name = string("transpose_48")]; tensor attention_0_split_qkv_heads_0, tensor attention_0_split_qkv_heads_1, tensor attention_0_split_qkv_heads_2 = split(axis = attention_0_split_qkv_heads_axis_0, split_sizes = attention_0_split_qkv_heads_split_sizes_0, x = attention_0_head_transpose)[name = string("attention_0_split_qkv_heads")]; tensor attention_0_q_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_0_q_rope_lhs_mult")]; int32 attention_0_q_rotate_half_split_num_splits_0 = const()[name = string("attention_0_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_0_q_rotate_half_split_axis_0 = const()[name = string("attention_0_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_0_q_rotate_half_split_0, tensor attention_0_q_rotate_half_split_1 = split(axis = attention_0_q_rotate_half_split_axis_0, num_splits = attention_0_q_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_0)[name = string("attention_0_q_rotate_half_split")]; fp16 attention_0_q_rotate_half_neg_y_0 = const()[name = string("attention_0_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_0_q_rotate_half_neg = mul(x = attention_0_q_rotate_half_split_1, y = attention_0_q_rotate_half_neg_y_0)[name = string("attention_0_q_rotate_half_neg")]; int32 attention_0_q_rotate_half_concat_axis_0 = const()[name = string("attention_0_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_0_q_rotate_half_concat_interleave_0 = const()[name = string("attention_0_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_0_q_rotate_half_concat = concat(axis = attention_0_q_rotate_half_concat_axis_0, interleave = attention_0_q_rotate_half_concat_interleave_0, values = (attention_0_q_rotate_half_neg, attention_0_q_rotate_half_split_0))[name = string("attention_0_q_rotate_half_concat")]; tensor attention_0_q_rope_rhs_mult = mul(x = attention_0_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_q_rope_rhs_mult")]; tensor attention_0_q_rope = add(x = attention_0_q_rope_lhs_mult, y = attention_0_q_rope_rhs_mult)[name = string("attention_0_q_rope")]; tensor attention_0_k_rope_lhs_mult = mul(x = attention_0_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_0_k_rope_lhs_mult")]; int32 attention_0_k_rotate_half_split_num_splits_0 = const()[name = string("attention_0_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_0_k_rotate_half_split_axis_0 = const()[name = string("attention_0_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_0_k_rotate_half_split_0, tensor attention_0_k_rotate_half_split_1 = split(axis = attention_0_k_rotate_half_split_axis_0, num_splits = attention_0_k_rotate_half_split_num_splits_0, x = attention_0_split_qkv_heads_1)[name = string("attention_0_k_rotate_half_split")]; fp16 attention_0_k_rotate_half_neg_y_0 = const()[name = string("attention_0_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_0_k_rotate_half_neg = mul(x = attention_0_k_rotate_half_split_1, y = attention_0_k_rotate_half_neg_y_0)[name = string("attention_0_k_rotate_half_neg")]; int32 attention_0_k_rotate_half_concat_axis_0 = const()[name = string("attention_0_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_0_k_rotate_half_concat_interleave_0 = const()[name = string("attention_0_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_0_k_rotate_half_concat = concat(axis = attention_0_k_rotate_half_concat_axis_0, interleave = attention_0_k_rotate_half_concat_interleave_0, values = (attention_0_k_rotate_half_neg, attention_0_k_rotate_half_split_0))[name = string("attention_0_k_rotate_half_concat")]; tensor attention_0_k_rope_rhs_mult = mul(x = attention_0_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_0_k_rope_rhs_mult")]; tensor attention_0_k_rope = add(x = attention_0_k_rope_lhs_mult, y = attention_0_k_rope_rhs_mult)[name = string("attention_0_k_rope")]; int32 attention_0_q_splits_axis_0 = const()[name = string("attention_0_q_splits_axis_0"), val = int32(1)]; int32 attention_0_q_splits_num_splits_0 = const()[name = string("attention_0_q_splits_num_splits_0"), val = int32(2)]; tensor attention_0_q_splits_0, tensor attention_0_q_splits_1 = split(axis = attention_0_q_splits_axis_0, num_splits = attention_0_q_splits_num_splits_0, x = attention_0_q_rope)[name = string("attention_0_q_splits")]; tensor attention_0_update_begin_0_values0_0 = const()[name = string("attention_0_update_begin_0_values0_0"), val = tensor([0])]; tensor attention_0_update_begin_0_values1_0 = const()[name = string("attention_0_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_0_update_begin_0_values3_0 = const()[name = string("attention_0_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_0_update_begin_0_axis_0 = const()[name = string("attention_0_update_begin_0_axis_0"), val = int32(0)]; bool attention_0_update_begin_0_interleave_0 = const()[name = string("attention_0_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_0_update_begin_0 = concat(axis = attention_0_update_begin_0_axis_0, interleave = attention_0_update_begin_0_interleave_0, values = (attention_0_update_begin_0_values0_0, attention_0_update_begin_0_values1_0, query_pos1, attention_0_update_begin_0_values3_0))[name = string("attention_0_update_begin_0")]; tensor attention_0_update_end_0_values0_0 = const()[name = string("attention_0_update_end_0_values0_0"), val = tensor([1])]; tensor attention_0_update_end_0_values1_0 = const()[name = string("attention_0_update_end_0_values1_0"), val = tensor([2])]; tensor attention_0_update_end_0_values3_0 = const()[name = string("attention_0_update_end_0_values3_0"), val = tensor([64])]; int32 attention_0_update_end_0_axis_0 = const()[name = string("attention_0_update_end_0_axis_0"), val = int32(0)]; bool attention_0_update_end_0_interleave_0 = const()[name = string("attention_0_update_end_0_interleave_0"), val = bool(false)]; tensor attention_0_update_end_0 = concat(axis = attention_0_update_end_0_axis_0, interleave = attention_0_update_end_0_interleave_0, values = (attention_0_update_end_0_values0_0, attention_0_update_end_0_values1_0, end_pos_0, attention_0_update_end_0_values3_0))[name = string("attention_0_update_end_0")]; tensor attention_0_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_updated_key_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_key_cache_0_squeeze_mask_0, update = attention_0_k_rope, x = read_state_0)[name = string("attention_0_updated_key_cache_0")]; write_state(data = attention_0_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_0 = read_state(input = key_cache_state)[name = string("coreml_update_state_48")]; tensor attention_0_key_cache_begin_0 = const()[name = string("attention_0_key_cache_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_0_key_cache_end_0 = const()[name = string("attention_0_key_cache_end_0"), val = tensor([1, 2, 512, 64])]; tensor attention_0_key_cache_squeeze_mask_0 = const()[name = string("attention_0_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_key_cache = slice_by_index(begin = attention_0_key_cache_begin_0, end = attention_0_key_cache_end_0, squeeze_mask = attention_0_key_cache_squeeze_mask_0, x = coreml_update_state_0)[name = string("attention_0_key_cache")]; int32 attention_0_key_cache_head_axis_0 = const()[name = string("attention_0_key_cache_head_axis_0"), val = int32(1)]; int32 attention_0_key_cache_head_num_splits_0 = const()[name = string("attention_0_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_0_key_cache_head_0, tensor attention_0_key_cache_head_1 = split(axis = attention_0_key_cache_head_axis_0, num_splits = attention_0_key_cache_head_num_splits_0, x = attention_0_key_cache)[name = string("attention_0_key_cache_head")]; tensor attention_0_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_0_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_updated_value_cache_0 = slice_update(begin = attention_0_update_begin_0, end = attention_0_update_end_0, squeeze_mask = attention_0_updated_value_cache_0_squeeze_mask_0, update = attention_0_split_qkv_heads_2, x = read_state_1)[name = string("attention_0_updated_value_cache_0")]; write_state(data = attention_0_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_1 = read_state(input = value_cache_state)[name = string("coreml_update_state_49")]; tensor attention_0_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_0_slice_current_layer_value_cache_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_0_slice_current_layer_value_cache_end_0 = const()[name = string("attention_0_slice_current_layer_value_cache_end_0"), val = tensor([1, 2, 512, 64])]; tensor attention_0_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_0_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_0_slice_current_layer_value_cache = slice_by_index(begin = attention_0_slice_current_layer_value_cache_begin_0, end = attention_0_slice_current_layer_value_cache_end_0, squeeze_mask = attention_0_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_1)[name = string("attention_0_slice_current_layer_value_cache")]; int32 attention_0_slice_value_cache_heads_axis_0 = const()[name = string("attention_0_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_0_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_0_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_0_slice_value_cache_heads_0, tensor attention_0_slice_value_cache_heads_1 = split(axis = attention_0_slice_value_cache_heads_axis_0, num_splits = attention_0_slice_value_cache_heads_num_splits_0, x = attention_0_slice_current_layer_value_cache)[name = string("attention_0_slice_value_cache_heads")]; bool attention_0_scores_0_transpose_y_0 = const()[name = string("attention_0_scores_0_transpose_y_0"), val = bool(true)]; bool attention_0_scores_0_transpose_x_0 = const()[name = string("attention_0_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_0_scores_0 = matmul(transpose_x = attention_0_scores_0_transpose_x_0, transpose_y = attention_0_scores_0_transpose_y_0, x = attention_0_key_cache_head_0, y = attention_0_q_splits_0)[name = string("attention_0_scores_0")]; fp16 attention_0_scaled_scores_0_y_0 = const()[name = string("attention_0_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_0_scaled_scores_0 = mul(x = attention_0_scores_0, y = attention_0_scaled_scores_0_y_0)[name = string("attention_0_scaled_scores_0")]; tensor transpose_0 = transpose(perm = transpose_0_perm_0, x = mask_gather)[name = string("transpose_50")]; tensor attention_0_masked_scaled_scores_0 = add(x = attention_0_scaled_scores_0, y = transpose_0)[name = string("attention_0_masked_scaled_scores_0")]; int32 softmax_0_axis_0 = const()[name = string("softmax_0_axis_0"), val = int32(-2)]; tensor softmax_0 = softmax(axis = softmax_0_axis_0, x = attention_0_masked_scaled_scores_0)[name = string("softmax_0")]; bool attention_0_attention_0_transpose_x_0 = const()[name = string("attention_0_attention_0_transpose_x_0"), val = bool(true)]; bool attention_0_attention_0_transpose_y_0 = const()[name = string("attention_0_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_0_attention_0 = matmul(transpose_x = attention_0_attention_0_transpose_x_0, transpose_y = attention_0_attention_0_transpose_y_0, x = softmax_0, y = attention_0_slice_value_cache_heads_0)[name = string("attention_0_attention_0")]; bool attention_0_scores_1_transpose_y_0 = const()[name = string("attention_0_scores_1_transpose_y_0"), val = bool(true)]; bool attention_0_scores_1_transpose_x_0 = const()[name = string("attention_0_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_0_scores_1 = matmul(transpose_x = attention_0_scores_1_transpose_x_0, transpose_y = attention_0_scores_1_transpose_y_0, x = attention_0_key_cache_head_1, y = attention_0_q_splits_1)[name = string("attention_0_scores_1")]; fp16 attention_0_scaled_scores_1_y_0 = const()[name = string("attention_0_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_0_scaled_scores_1 = mul(x = attention_0_scores_1, y = attention_0_scaled_scores_1_y_0)[name = string("attention_0_scaled_scores_1")]; tensor attention_0_masked_scaled_scores_1 = add(x = attention_0_scaled_scores_1, y = transpose_0)[name = string("attention_0_masked_scaled_scores_1")]; int32 softmax_1_axis_0 = const()[name = string("softmax_1_axis_0"), val = int32(-2)]; tensor softmax_1 = softmax(axis = softmax_1_axis_0, x = attention_0_masked_scaled_scores_1)[name = string("softmax_1")]; bool attention_0_attention_1_transpose_x_0 = const()[name = string("attention_0_attention_1_transpose_x_0"), val = bool(true)]; bool attention_0_attention_1_transpose_y_0 = const()[name = string("attention_0_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_0_attention_1 = matmul(transpose_x = attention_0_attention_1_transpose_x_0, transpose_y = attention_0_attention_1_transpose_y_0, x = softmax_1, y = attention_0_slice_value_cache_heads_1)[name = string("attention_0_attention_1")]; int32 attention_0_concat_attention_all_heads_axis_0 = const()[name = string("attention_0_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_0_concat_attention_all_heads_interleave_0 = const()[name = string("attention_0_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_0_concat_attention_all_heads = concat(axis = attention_0_concat_attention_all_heads_axis_0, interleave = attention_0_concat_attention_all_heads_interleave_0, values = (attention_0_attention_0, attention_0_attention_1))[name = string("attention_0_concat_attention_all_heads")]; tensor attention_0_channels_first_retransposed_perm_0 = const()[name = string("attention_0_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_0_reshaped_shape_0 = const()[name = string("attention_0_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_0_channels_first_retransposed = transpose(perm = attention_0_channels_first_retransposed_perm_0, x = attention_0_concat_attention_all_heads)[name = string("transpose_47")]; tensor attention_0_reshaped = reshape(shape = attention_0_reshaped_shape_0, x = attention_0_channels_first_retransposed)[name = string("attention_0_reshaped")]; tensor attention_0_outproj_weight_0 = const()[name = string("attention_0_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274993664)))]; tensor attention_0_outproj_strides_0 = const()[name = string("attention_0_outproj_strides_0"), val = tensor([1])]; string attention_0_outproj_pad_type_0 = const()[name = string("attention_0_outproj_pad_type_0"), val = string("valid")]; tensor attention_0_outproj_pad_0 = const()[name = string("attention_0_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_0_outproj_dilations_0 = const()[name = string("attention_0_outproj_dilations_0"), val = tensor([1])]; int32 attention_0_outproj_groups_0 = const()[name = string("attention_0_outproj_groups_0"), val = int32(1)]; tensor attention_0_outproj = conv(dilations = attention_0_outproj_dilations_0, groups = attention_0_outproj_groups_0, pad = attention_0_outproj_pad_0, pad_type = attention_0_outproj_pad_type_0, strides = attention_0_outproj_strides_0, weight = attention_0_outproj_weight_0, x = attention_0_reshaped)[name = string("attention_0_outproj")]; tensor block_0_residual_1 = add(x = input_embeddings_channels_first, y = attention_0_outproj)[name = string("block_0_residual_1")]; tensor block_0_ffn_rmsnorm_abs = abs(x = block_0_residual_1)[name = string("block_0_ffn_rmsnorm_abs")]; tensor block_0_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_0_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_0_ffn_rmsnorm_maxval = reduce_max(axes = block_0_ffn_rmsnorm_maxval_axes_0, keep_dims = block_0_ffn_rmsnorm_maxval_keep_dims_0, x = block_0_ffn_rmsnorm_abs)[name = string("block_0_ffn_rmsnorm_maxval")]; fp16 block_0_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_0_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_0_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_0_ffn_rmsnorm_maxval_clipped = clip(alpha = block_0_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_0_ffn_rmsnorm_maxval_clipped_beta_0, x = block_0_ffn_rmsnorm_maxval)[name = string("block_0_ffn_rmsnorm_maxval_clipped")]; tensor block_0_ffn_rmsnorm_scaled = real_div(x = block_0_residual_1, y = block_0_ffn_rmsnorm_maxval_clipped)[name = string("block_0_ffn_rmsnorm_scaled")]; tensor block_0_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_0_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_0_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_0_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_0_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_0_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_0_ffn_rmsnorm_scaled)[name = string("block_0_ffn_rmsnorm_squared_sum")]; fp16 block_0_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_0_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_0_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_0_ffn_rmsnorm_rsqrt_epsilon_0, x = block_0_ffn_rmsnorm_squared_sum)[name = string("block_0_ffn_rmsnorm_rsqrt")]; fp16 block_0_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_0_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_0_ffn_rmsnorm_dim_scaled = mul(x = block_0_ffn_rmsnorm_scaled, y = block_0_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_0_ffn_rmsnorm_dim_scaled")]; tensor block_0_ffn_rmsnorm_normalized = mul(x = block_0_ffn_rmsnorm_dim_scaled, y = block_0_ffn_rmsnorm_rsqrt)[name = string("block_0_ffn_rmsnorm_normalized")]; tensor block_0_ffn_rmsnorm_y_0 = const()[name = string("block_0_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276599360)))]; tensor block_0_ffn_rmsnorm = mul(x = block_0_ffn_rmsnorm_normalized, y = block_0_ffn_rmsnorm_y_0)[name = string("block_0_ffn_rmsnorm")]; tensor block_0_ffn_inproj_weight_0 = const()[name = string("block_0_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(276601216)))]; tensor block_0_ffn_inproj_strides_0 = const()[name = string("block_0_ffn_inproj_strides_0"), val = tensor([1])]; string block_0_ffn_inproj_pad_type_0 = const()[name = string("block_0_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_0_ffn_inproj_pad_0 = const()[name = string("block_0_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_0_ffn_inproj_dilations_0 = const()[name = string("block_0_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_0_ffn_inproj_groups_0 = const()[name = string("block_0_ffn_inproj_groups_0"), val = int32(1)]; tensor block_0_ffn_inproj = conv(dilations = block_0_ffn_inproj_dilations_0, groups = block_0_ffn_inproj_groups_0, pad = block_0_ffn_inproj_pad_0, pad_type = block_0_ffn_inproj_pad_type_0, strides = block_0_ffn_inproj_strides_0, weight = block_0_ffn_inproj_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_inproj")]; tensor block_0_ffn_g_weight_0 = const()[name = string("block_0_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285317568)))]; tensor block_0_ffn_g_strides_0 = const()[name = string("block_0_ffn_g_strides_0"), val = tensor([1])]; string block_0_ffn_g_pad_type_0 = const()[name = string("block_0_ffn_g_pad_type_0"), val = string("valid")]; tensor block_0_ffn_g_pad_0 = const()[name = string("block_0_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_0_ffn_g_dilations_0 = const()[name = string("block_0_ffn_g_dilations_0"), val = tensor([1])]; int32 block_0_ffn_g_groups_0 = const()[name = string("block_0_ffn_g_groups_0"), val = int32(1)]; tensor block_0_ffn_g = conv(dilations = block_0_ffn_g_dilations_0, groups = block_0_ffn_g_groups_0, pad = block_0_ffn_g_pad_0, pad_type = block_0_ffn_g_pad_type_0, strides = block_0_ffn_g_strides_0, weight = block_0_ffn_g_weight_0, x = block_0_ffn_rmsnorm)[name = string("block_0_ffn_g")]; tensor block_0_ffn_g_activation = silu(x = block_0_ffn_g)[name = string("block_0_ffn_g_activation")]; tensor block_0_ffn_x_gated = mul(x = block_0_ffn_inproj, y = block_0_ffn_g_activation)[name = string("block_0_ffn_x_gated")]; tensor block_0_ffn_outproj_weight_0 = const()[name = string("block_0_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(294033920)))]; tensor block_0_ffn_outproj_strides_0 = const()[name = string("block_0_ffn_outproj_strides_0"), val = tensor([1])]; string block_0_ffn_outproj_pad_type_0 = const()[name = string("block_0_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_0_ffn_outproj_pad_0 = const()[name = string("block_0_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_0_ffn_outproj_dilations_0 = const()[name = string("block_0_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_0_ffn_outproj_groups_0 = const()[name = string("block_0_ffn_outproj_groups_0"), val = int32(1)]; tensor block_0_ffn_outproj = conv(dilations = block_0_ffn_outproj_dilations_0, groups = block_0_ffn_outproj_groups_0, pad = block_0_ffn_outproj_pad_0, pad_type = block_0_ffn_outproj_pad_type_0, strides = block_0_ffn_outproj_strides_0, weight = block_0_ffn_outproj_weight_0, x = block_0_ffn_x_gated)[name = string("block_0_ffn_outproj")]; tensor block_0_residual_2 = add(x = block_0_ffn_outproj, y = block_0_residual_1)[name = string("block_0_residual_2")]; tensor block_1_attention_rmsnorm_abs = abs(x = block_0_residual_2)[name = string("block_1_attention_rmsnorm_abs")]; tensor block_1_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_1_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_1_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_1_attention_rmsnorm_maxval = reduce_max(axes = block_1_attention_rmsnorm_maxval_axes_0, keep_dims = block_1_attention_rmsnorm_maxval_keep_dims_0, x = block_1_attention_rmsnorm_abs)[name = string("block_1_attention_rmsnorm_maxval")]; fp16 block_1_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_1_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_1_attention_rmsnorm_maxval_clipped = clip(alpha = block_1_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_1_attention_rmsnorm_maxval_clipped_beta_0, x = block_1_attention_rmsnorm_maxval)[name = string("block_1_attention_rmsnorm_maxval_clipped")]; tensor block_1_attention_rmsnorm_scaled = real_div(x = block_0_residual_2, y = block_1_attention_rmsnorm_maxval_clipped)[name = string("block_1_attention_rmsnorm_scaled")]; tensor block_1_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_1_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_1_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_1_attention_rmsnorm_squared_sum_keep_dims_0, x = block_1_attention_rmsnorm_scaled)[name = string("block_1_attention_rmsnorm_squared_sum")]; fp16 block_1_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_1_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_1_attention_rmsnorm_rsqrt_epsilon_0, x = block_1_attention_rmsnorm_squared_sum)[name = string("block_1_attention_rmsnorm_rsqrt")]; fp16 block_1_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_1_attention_rmsnorm_dim_scaled = mul(x = block_1_attention_rmsnorm_scaled, y = block_1_attention_rmsnorm_dim_scaled_y_0)[name = string("block_1_attention_rmsnorm_dim_scaled")]; tensor block_1_attention_rmsnorm_normalized = mul(x = block_1_attention_rmsnorm_dim_scaled, y = block_1_attention_rmsnorm_rsqrt)[name = string("block_1_attention_rmsnorm_normalized")]; tensor block_1_attention_rmsnorm_y_0 = const()[name = string("block_1_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302750272)))]; tensor block_1_attention_rmsnorm = mul(x = block_1_attention_rmsnorm_normalized, y = block_1_attention_rmsnorm_y_0)[name = string("block_1_attention_rmsnorm")]; tensor attention_1_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(302752128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303526336))))[name = string("attention_1_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_0 = constexpr_blockwise_shift_scale(data = attention_1_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303563264))))[name = string("constexpr_blockwise_shift_scale_0")]; tensor attention_1_qkvproj_bias_0 = const()[name = string("attention_1_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303565632)))]; tensor attention_1_qkvproj_strides_0 = const()[name = string("attention_1_qkvproj_strides_0"), val = tensor([1])]; string attention_1_qkvproj_pad_type_0 = const()[name = string("attention_1_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_1_qkvproj_pad_0 = const()[name = string("attention_1_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_1_qkvproj_dilations_0 = const()[name = string("attention_1_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_1_qkvproj_groups_0 = const()[name = string("attention_1_qkvproj_groups_0"), val = int32(1)]; tensor attention_1_qkvproj = conv(bias = attention_1_qkvproj_bias_0, dilations = attention_1_qkvproj_dilations_0, groups = attention_1_qkvproj_groups_0, pad = attention_1_qkvproj_pad_0, pad_type = attention_1_qkvproj_pad_type_0, strides = attention_1_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_0, x = block_1_attention_rmsnorm)[name = string("attention_1_qkvproj")]; tensor attention_1_head_reshape_shape_0 = const()[name = string("attention_1_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_1_head_reshape = reshape(shape = attention_1_head_reshape_shape_0, x = attention_1_qkvproj)[name = string("attention_1_head_reshape")]; tensor attention_1_head_transpose_perm_0 = const()[name = string("attention_1_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_1_split_qkv_heads_axis_0 = const()[name = string("attention_1_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_1_split_qkv_heads_split_sizes_0 = const()[name = string("attention_1_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_1_head_transpose = transpose(perm = attention_1_head_transpose_perm_0, x = attention_1_head_reshape)[name = string("transpose_46")]; tensor attention_1_split_qkv_heads_0, tensor attention_1_split_qkv_heads_1, tensor attention_1_split_qkv_heads_2 = split(axis = attention_1_split_qkv_heads_axis_0, split_sizes = attention_1_split_qkv_heads_split_sizes_0, x = attention_1_head_transpose)[name = string("attention_1_split_qkv_heads")]; tensor attention_1_q_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_1_q_rope_lhs_mult")]; int32 attention_1_q_rotate_half_split_num_splits_0 = const()[name = string("attention_1_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_1_q_rotate_half_split_axis_0 = const()[name = string("attention_1_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_1_q_rotate_half_split_0, tensor attention_1_q_rotate_half_split_1 = split(axis = attention_1_q_rotate_half_split_axis_0, num_splits = attention_1_q_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_0)[name = string("attention_1_q_rotate_half_split")]; fp16 attention_1_q_rotate_half_neg_y_0 = const()[name = string("attention_1_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_1_q_rotate_half_neg = mul(x = attention_1_q_rotate_half_split_1, y = attention_1_q_rotate_half_neg_y_0)[name = string("attention_1_q_rotate_half_neg")]; int32 attention_1_q_rotate_half_concat_axis_0 = const()[name = string("attention_1_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_1_q_rotate_half_concat_interleave_0 = const()[name = string("attention_1_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_1_q_rotate_half_concat = concat(axis = attention_1_q_rotate_half_concat_axis_0, interleave = attention_1_q_rotate_half_concat_interleave_0, values = (attention_1_q_rotate_half_neg, attention_1_q_rotate_half_split_0))[name = string("attention_1_q_rotate_half_concat")]; tensor attention_1_q_rope_rhs_mult = mul(x = attention_1_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_q_rope_rhs_mult")]; tensor attention_1_q_rope = add(x = attention_1_q_rope_lhs_mult, y = attention_1_q_rope_rhs_mult)[name = string("attention_1_q_rope")]; tensor attention_1_k_rope_lhs_mult = mul(x = attention_1_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_1_k_rope_lhs_mult")]; int32 attention_1_k_rotate_half_split_num_splits_0 = const()[name = string("attention_1_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_1_k_rotate_half_split_axis_0 = const()[name = string("attention_1_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_1_k_rotate_half_split_0, tensor attention_1_k_rotate_half_split_1 = split(axis = attention_1_k_rotate_half_split_axis_0, num_splits = attention_1_k_rotate_half_split_num_splits_0, x = attention_1_split_qkv_heads_1)[name = string("attention_1_k_rotate_half_split")]; fp16 attention_1_k_rotate_half_neg_y_0 = const()[name = string("attention_1_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_1_k_rotate_half_neg = mul(x = attention_1_k_rotate_half_split_1, y = attention_1_k_rotate_half_neg_y_0)[name = string("attention_1_k_rotate_half_neg")]; int32 attention_1_k_rotate_half_concat_axis_0 = const()[name = string("attention_1_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_1_k_rotate_half_concat_interleave_0 = const()[name = string("attention_1_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_1_k_rotate_half_concat = concat(axis = attention_1_k_rotate_half_concat_axis_0, interleave = attention_1_k_rotate_half_concat_interleave_0, values = (attention_1_k_rotate_half_neg, attention_1_k_rotate_half_split_0))[name = string("attention_1_k_rotate_half_concat")]; tensor attention_1_k_rope_rhs_mult = mul(x = attention_1_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_1_k_rope_rhs_mult")]; tensor attention_1_k_rope = add(x = attention_1_k_rope_lhs_mult, y = attention_1_k_rope_rhs_mult)[name = string("attention_1_k_rope")]; int32 attention_1_q_splits_axis_0 = const()[name = string("attention_1_q_splits_axis_0"), val = int32(1)]; int32 attention_1_q_splits_num_splits_0 = const()[name = string("attention_1_q_splits_num_splits_0"), val = int32(2)]; tensor attention_1_q_splits_0, tensor attention_1_q_splits_1 = split(axis = attention_1_q_splits_axis_0, num_splits = attention_1_q_splits_num_splits_0, x = attention_1_q_rope)[name = string("attention_1_q_splits")]; tensor attention_1_update_begin_0_values0_0 = const()[name = string("attention_1_update_begin_0_values0_0"), val = tensor([1])]; tensor attention_1_update_begin_0_values1_0 = const()[name = string("attention_1_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_1_update_begin_0_values3_0 = const()[name = string("attention_1_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_1_update_begin_0_axis_0 = const()[name = string("attention_1_update_begin_0_axis_0"), val = int32(0)]; bool attention_1_update_begin_0_interleave_0 = const()[name = string("attention_1_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_1_update_begin_0 = concat(axis = attention_1_update_begin_0_axis_0, interleave = attention_1_update_begin_0_interleave_0, values = (attention_1_update_begin_0_values0_0, attention_1_update_begin_0_values1_0, query_pos1, attention_1_update_begin_0_values3_0))[name = string("attention_1_update_begin_0")]; tensor attention_1_update_end_0_values0_0 = const()[name = string("attention_1_update_end_0_values0_0"), val = tensor([2])]; tensor attention_1_update_end_0_values1_0 = const()[name = string("attention_1_update_end_0_values1_0"), val = tensor([2])]; tensor attention_1_update_end_0_values3_0 = const()[name = string("attention_1_update_end_0_values3_0"), val = tensor([64])]; int32 attention_1_update_end_0_axis_0 = const()[name = string("attention_1_update_end_0_axis_0"), val = int32(0)]; bool attention_1_update_end_0_interleave_0 = const()[name = string("attention_1_update_end_0_interleave_0"), val = bool(false)]; tensor attention_1_update_end_0 = concat(axis = attention_1_update_end_0_axis_0, interleave = attention_1_update_end_0_interleave_0, values = (attention_1_update_end_0_values0_0, attention_1_update_end_0_values1_0, end_pos_0, attention_1_update_end_0_values3_0))[name = string("attention_1_update_end_0")]; tensor attention_1_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_updated_key_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_key_cache_0_squeeze_mask_0, update = attention_1_k_rope, x = coreml_update_state_0)[name = string("attention_1_updated_key_cache_0")]; write_state(data = attention_1_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_2 = read_state(input = key_cache_state)[name = string("coreml_update_state_50")]; tensor attention_1_key_cache_begin_0 = const()[name = string("attention_1_key_cache_begin_0"), val = tensor([1, 0, 0, 0])]; tensor attention_1_key_cache_end_0 = const()[name = string("attention_1_key_cache_end_0"), val = tensor([2, 2, 512, 64])]; tensor attention_1_key_cache_squeeze_mask_0 = const()[name = string("attention_1_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_key_cache = slice_by_index(begin = attention_1_key_cache_begin_0, end = attention_1_key_cache_end_0, squeeze_mask = attention_1_key_cache_squeeze_mask_0, x = coreml_update_state_2)[name = string("attention_1_key_cache")]; int32 attention_1_key_cache_head_axis_0 = const()[name = string("attention_1_key_cache_head_axis_0"), val = int32(1)]; int32 attention_1_key_cache_head_num_splits_0 = const()[name = string("attention_1_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_1_key_cache_head_0, tensor attention_1_key_cache_head_1 = split(axis = attention_1_key_cache_head_axis_0, num_splits = attention_1_key_cache_head_num_splits_0, x = attention_1_key_cache)[name = string("attention_1_key_cache_head")]; tensor attention_1_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_1_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_updated_value_cache_0 = slice_update(begin = attention_1_update_begin_0, end = attention_1_update_end_0, squeeze_mask = attention_1_updated_value_cache_0_squeeze_mask_0, update = attention_1_split_qkv_heads_2, x = coreml_update_state_1)[name = string("attention_1_updated_value_cache_0")]; write_state(data = attention_1_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_3 = read_state(input = value_cache_state)[name = string("coreml_update_state_51")]; tensor attention_1_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_1_slice_current_layer_value_cache_begin_0"), val = tensor([1, 0, 0, 0])]; tensor attention_1_slice_current_layer_value_cache_end_0 = const()[name = string("attention_1_slice_current_layer_value_cache_end_0"), val = tensor([2, 2, 512, 64])]; tensor attention_1_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_1_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_1_slice_current_layer_value_cache = slice_by_index(begin = attention_1_slice_current_layer_value_cache_begin_0, end = attention_1_slice_current_layer_value_cache_end_0, squeeze_mask = attention_1_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_3)[name = string("attention_1_slice_current_layer_value_cache")]; int32 attention_1_slice_value_cache_heads_axis_0 = const()[name = string("attention_1_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_1_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_1_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_1_slice_value_cache_heads_0, tensor attention_1_slice_value_cache_heads_1 = split(axis = attention_1_slice_value_cache_heads_axis_0, num_splits = attention_1_slice_value_cache_heads_num_splits_0, x = attention_1_slice_current_layer_value_cache)[name = string("attention_1_slice_value_cache_heads")]; bool attention_1_scores_0_transpose_y_0 = const()[name = string("attention_1_scores_0_transpose_y_0"), val = bool(true)]; bool attention_1_scores_0_transpose_x_0 = const()[name = string("attention_1_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_1_scores_0 = matmul(transpose_x = attention_1_scores_0_transpose_x_0, transpose_y = attention_1_scores_0_transpose_y_0, x = attention_1_key_cache_head_0, y = attention_1_q_splits_0)[name = string("attention_1_scores_0")]; fp16 attention_1_scaled_scores_0_y_0 = const()[name = string("attention_1_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_1_scaled_scores_0 = mul(x = attention_1_scores_0, y = attention_1_scaled_scores_0_y_0)[name = string("attention_1_scaled_scores_0")]; tensor attention_1_masked_scaled_scores_0 = add(x = attention_1_scaled_scores_0, y = transpose_0)[name = string("attention_1_masked_scaled_scores_0")]; int32 softmax_2_axis_0 = const()[name = string("softmax_2_axis_0"), val = int32(-2)]; tensor softmax_2 = softmax(axis = softmax_2_axis_0, x = attention_1_masked_scaled_scores_0)[name = string("softmax_2")]; bool attention_1_attention_0_transpose_x_0 = const()[name = string("attention_1_attention_0_transpose_x_0"), val = bool(true)]; bool attention_1_attention_0_transpose_y_0 = const()[name = string("attention_1_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_1_attention_0 = matmul(transpose_x = attention_1_attention_0_transpose_x_0, transpose_y = attention_1_attention_0_transpose_y_0, x = softmax_2, y = attention_1_slice_value_cache_heads_0)[name = string("attention_1_attention_0")]; bool attention_1_scores_1_transpose_y_0 = const()[name = string("attention_1_scores_1_transpose_y_0"), val = bool(true)]; bool attention_1_scores_1_transpose_x_0 = const()[name = string("attention_1_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_1_scores_1 = matmul(transpose_x = attention_1_scores_1_transpose_x_0, transpose_y = attention_1_scores_1_transpose_y_0, x = attention_1_key_cache_head_1, y = attention_1_q_splits_1)[name = string("attention_1_scores_1")]; fp16 attention_1_scaled_scores_1_y_0 = const()[name = string("attention_1_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_1_scaled_scores_1 = mul(x = attention_1_scores_1, y = attention_1_scaled_scores_1_y_0)[name = string("attention_1_scaled_scores_1")]; tensor attention_1_masked_scaled_scores_1 = add(x = attention_1_scaled_scores_1, y = transpose_0)[name = string("attention_1_masked_scaled_scores_1")]; int32 softmax_3_axis_0 = const()[name = string("softmax_3_axis_0"), val = int32(-2)]; tensor softmax_3 = softmax(axis = softmax_3_axis_0, x = attention_1_masked_scaled_scores_1)[name = string("softmax_3")]; bool attention_1_attention_1_transpose_x_0 = const()[name = string("attention_1_attention_1_transpose_x_0"), val = bool(true)]; bool attention_1_attention_1_transpose_y_0 = const()[name = string("attention_1_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_1_attention_1 = matmul(transpose_x = attention_1_attention_1_transpose_x_0, transpose_y = attention_1_attention_1_transpose_y_0, x = softmax_3, y = attention_1_slice_value_cache_heads_1)[name = string("attention_1_attention_1")]; int32 attention_1_concat_attention_all_heads_axis_0 = const()[name = string("attention_1_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_1_concat_attention_all_heads_interleave_0 = const()[name = string("attention_1_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_1_concat_attention_all_heads = concat(axis = attention_1_concat_attention_all_heads_axis_0, interleave = attention_1_concat_attention_all_heads_interleave_0, values = (attention_1_attention_0, attention_1_attention_1))[name = string("attention_1_concat_attention_all_heads")]; tensor attention_1_channels_first_retransposed_perm_0 = const()[name = string("attention_1_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_1_reshaped_shape_0 = const()[name = string("attention_1_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_1_channels_first_retransposed = transpose(perm = attention_1_channels_first_retransposed_perm_0, x = attention_1_concat_attention_all_heads)[name = string("transpose_45")]; tensor attention_1_reshaped = reshape(shape = attention_1_reshaped_shape_0, x = attention_1_channels_first_retransposed)[name = string("attention_1_reshaped")]; tensor attention_1_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(303568000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304170176))))[name = string("attention_1_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_1 = constexpr_blockwise_shift_scale(data = attention_1_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304198912))))[name = string("constexpr_blockwise_shift_scale_1")]; tensor attention_1_outproj_strides_0 = const()[name = string("attention_1_outproj_strides_0"), val = tensor([1])]; string attention_1_outproj_pad_type_0 = const()[name = string("attention_1_outproj_pad_type_0"), val = string("valid")]; tensor attention_1_outproj_pad_0 = const()[name = string("attention_1_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_1_outproj_dilations_0 = const()[name = string("attention_1_outproj_dilations_0"), val = tensor([1])]; int32 attention_1_outproj_groups_0 = const()[name = string("attention_1_outproj_groups_0"), val = int32(1)]; tensor attention_1_outproj = conv(dilations = attention_1_outproj_dilations_0, groups = attention_1_outproj_groups_0, pad = attention_1_outproj_pad_0, pad_type = attention_1_outproj_pad_type_0, strides = attention_1_outproj_strides_0, weight = constexpr_blockwise_shift_scale_1, x = attention_1_reshaped)[name = string("attention_1_outproj")]; tensor block_1_residual_1 = add(x = block_0_residual_2, y = attention_1_outproj)[name = string("block_1_residual_1")]; tensor block_1_ffn_rmsnorm_abs = abs(x = block_1_residual_1)[name = string("block_1_ffn_rmsnorm_abs")]; tensor block_1_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_1_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_1_ffn_rmsnorm_maxval = reduce_max(axes = block_1_ffn_rmsnorm_maxval_axes_0, keep_dims = block_1_ffn_rmsnorm_maxval_keep_dims_0, x = block_1_ffn_rmsnorm_abs)[name = string("block_1_ffn_rmsnorm_maxval")]; fp16 block_1_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_1_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_1_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_1_ffn_rmsnorm_maxval_clipped = clip(alpha = block_1_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_1_ffn_rmsnorm_maxval_clipped_beta_0, x = block_1_ffn_rmsnorm_maxval)[name = string("block_1_ffn_rmsnorm_maxval_clipped")]; tensor block_1_ffn_rmsnorm_scaled = real_div(x = block_1_residual_1, y = block_1_ffn_rmsnorm_maxval_clipped)[name = string("block_1_ffn_rmsnorm_scaled")]; tensor block_1_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_1_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_1_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_1_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_1_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_1_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_1_ffn_rmsnorm_scaled)[name = string("block_1_ffn_rmsnorm_squared_sum")]; fp16 block_1_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_1_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_1_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_1_ffn_rmsnorm_rsqrt_epsilon_0, x = block_1_ffn_rmsnorm_squared_sum)[name = string("block_1_ffn_rmsnorm_rsqrt")]; fp16 block_1_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_1_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_1_ffn_rmsnorm_dim_scaled = mul(x = block_1_ffn_rmsnorm_scaled, y = block_1_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_1_ffn_rmsnorm_dim_scaled")]; tensor block_1_ffn_rmsnorm_normalized = mul(x = block_1_ffn_rmsnorm_dim_scaled, y = block_1_ffn_rmsnorm_rsqrt)[name = string("block_1_ffn_rmsnorm_normalized")]; tensor block_1_ffn_rmsnorm_y_0 = const()[name = string("block_1_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304200768)))]; tensor block_1_ffn_rmsnorm = mul(x = block_1_ffn_rmsnorm_normalized, y = block_1_ffn_rmsnorm_y_0)[name = string("block_1_ffn_rmsnorm")]; tensor block_1_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304202624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307471296))))[name = string("block_1_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_2 = constexpr_blockwise_shift_scale(data = block_1_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307627008))))[name = string("constexpr_blockwise_shift_scale_2")]; tensor block_1_ffn_inproj_strides_0 = const()[name = string("block_1_ffn_inproj_strides_0"), val = tensor([1])]; string block_1_ffn_inproj_pad_type_0 = const()[name = string("block_1_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_1_ffn_inproj_pad_0 = const()[name = string("block_1_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_1_ffn_inproj_dilations_0 = const()[name = string("block_1_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_1_ffn_inproj_groups_0 = const()[name = string("block_1_ffn_inproj_groups_0"), val = int32(1)]; tensor block_1_ffn_inproj = conv(dilations = block_1_ffn_inproj_dilations_0, groups = block_1_ffn_inproj_groups_0, pad = block_1_ffn_inproj_pad_0, pad_type = block_1_ffn_inproj_pad_type_0, strides = block_1_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_2, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_inproj")]; tensor block_1_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(307636800))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310905472))))[name = string("block_1_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_3 = constexpr_blockwise_shift_scale(data = block_1_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311061184))))[name = string("constexpr_blockwise_shift_scale_3")]; tensor block_1_ffn_g_strides_0 = const()[name = string("block_1_ffn_g_strides_0"), val = tensor([1])]; string block_1_ffn_g_pad_type_0 = const()[name = string("block_1_ffn_g_pad_type_0"), val = string("valid")]; tensor block_1_ffn_g_pad_0 = const()[name = string("block_1_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_1_ffn_g_dilations_0 = const()[name = string("block_1_ffn_g_dilations_0"), val = tensor([1])]; int32 block_1_ffn_g_groups_0 = const()[name = string("block_1_ffn_g_groups_0"), val = int32(1)]; tensor block_1_ffn_g = conv(dilations = block_1_ffn_g_dilations_0, groups = block_1_ffn_g_groups_0, pad = block_1_ffn_g_pad_0, pad_type = block_1_ffn_g_pad_type_0, strides = block_1_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_3, x = block_1_ffn_rmsnorm)[name = string("block_1_ffn_g")]; tensor block_1_ffn_g_activation = silu(x = block_1_ffn_g)[name = string("block_1_ffn_g_activation")]; tensor block_1_ffn_x_gated = mul(x = block_1_ffn_inproj, y = block_1_ffn_g_activation)[name = string("block_1_ffn_x_gated")]; tensor block_1_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311070976))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314339648))))[name = string("block_1_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_4 = constexpr_blockwise_shift_scale(data = block_1_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314368384))))[name = string("constexpr_blockwise_shift_scale_4")]; tensor block_1_ffn_outproj_strides_0 = const()[name = string("block_1_ffn_outproj_strides_0"), val = tensor([1])]; string block_1_ffn_outproj_pad_type_0 = const()[name = string("block_1_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_1_ffn_outproj_pad_0 = const()[name = string("block_1_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_1_ffn_outproj_dilations_0 = const()[name = string("block_1_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_1_ffn_outproj_groups_0 = const()[name = string("block_1_ffn_outproj_groups_0"), val = int32(1)]; tensor block_1_ffn_outproj = conv(dilations = block_1_ffn_outproj_dilations_0, groups = block_1_ffn_outproj_groups_0, pad = block_1_ffn_outproj_pad_0, pad_type = block_1_ffn_outproj_pad_type_0, strides = block_1_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_4, x = block_1_ffn_x_gated)[name = string("block_1_ffn_outproj")]; tensor block_1_residual_2 = add(x = block_1_ffn_outproj, y = block_1_residual_1)[name = string("block_1_residual_2")]; tensor block_2_attention_rmsnorm_abs = abs(x = block_1_residual_2)[name = string("block_2_attention_rmsnorm_abs")]; tensor block_2_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_2_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_2_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_2_attention_rmsnorm_maxval = reduce_max(axes = block_2_attention_rmsnorm_maxval_axes_0, keep_dims = block_2_attention_rmsnorm_maxval_keep_dims_0, x = block_2_attention_rmsnorm_abs)[name = string("block_2_attention_rmsnorm_maxval")]; fp16 block_2_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_2_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_2_attention_rmsnorm_maxval_clipped = clip(alpha = block_2_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_2_attention_rmsnorm_maxval_clipped_beta_0, x = block_2_attention_rmsnorm_maxval)[name = string("block_2_attention_rmsnorm_maxval_clipped")]; tensor block_2_attention_rmsnorm_scaled = real_div(x = block_1_residual_2, y = block_2_attention_rmsnorm_maxval_clipped)[name = string("block_2_attention_rmsnorm_scaled")]; tensor block_2_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_2_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_2_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_2_attention_rmsnorm_squared_sum_keep_dims_0, x = block_2_attention_rmsnorm_scaled)[name = string("block_2_attention_rmsnorm_squared_sum")]; fp16 block_2_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_2_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_2_attention_rmsnorm_rsqrt_epsilon_0, x = block_2_attention_rmsnorm_squared_sum)[name = string("block_2_attention_rmsnorm_rsqrt")]; fp16 block_2_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_2_attention_rmsnorm_dim_scaled = mul(x = block_2_attention_rmsnorm_scaled, y = block_2_attention_rmsnorm_dim_scaled_y_0)[name = string("block_2_attention_rmsnorm_dim_scaled")]; tensor block_2_attention_rmsnorm_normalized = mul(x = block_2_attention_rmsnorm_dim_scaled, y = block_2_attention_rmsnorm_rsqrt)[name = string("block_2_attention_rmsnorm_normalized")]; tensor block_2_attention_rmsnorm_y_0 = const()[name = string("block_2_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314370240)))]; tensor block_2_attention_rmsnorm = mul(x = block_2_attention_rmsnorm_normalized, y = block_2_attention_rmsnorm_y_0)[name = string("block_2_attention_rmsnorm")]; tensor attention_2_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(314372096))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315146304))))[name = string("attention_2_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_5 = constexpr_blockwise_shift_scale(data = attention_2_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315183232))))[name = string("constexpr_blockwise_shift_scale_5")]; tensor attention_2_qkvproj_bias_0 = const()[name = string("attention_2_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315185600)))]; tensor attention_2_qkvproj_strides_0 = const()[name = string("attention_2_qkvproj_strides_0"), val = tensor([1])]; string attention_2_qkvproj_pad_type_0 = const()[name = string("attention_2_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_2_qkvproj_pad_0 = const()[name = string("attention_2_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_2_qkvproj_dilations_0 = const()[name = string("attention_2_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_2_qkvproj_groups_0 = const()[name = string("attention_2_qkvproj_groups_0"), val = int32(1)]; tensor attention_2_qkvproj = conv(bias = attention_2_qkvproj_bias_0, dilations = attention_2_qkvproj_dilations_0, groups = attention_2_qkvproj_groups_0, pad = attention_2_qkvproj_pad_0, pad_type = attention_2_qkvproj_pad_type_0, strides = attention_2_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_5, x = block_2_attention_rmsnorm)[name = string("attention_2_qkvproj")]; tensor attention_2_head_reshape_shape_0 = const()[name = string("attention_2_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_2_head_reshape = reshape(shape = attention_2_head_reshape_shape_0, x = attention_2_qkvproj)[name = string("attention_2_head_reshape")]; tensor attention_2_head_transpose_perm_0 = const()[name = string("attention_2_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_2_split_qkv_heads_axis_0 = const()[name = string("attention_2_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_2_split_qkv_heads_split_sizes_0 = const()[name = string("attention_2_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_2_head_transpose = transpose(perm = attention_2_head_transpose_perm_0, x = attention_2_head_reshape)[name = string("transpose_44")]; tensor attention_2_split_qkv_heads_0, tensor attention_2_split_qkv_heads_1, tensor attention_2_split_qkv_heads_2 = split(axis = attention_2_split_qkv_heads_axis_0, split_sizes = attention_2_split_qkv_heads_split_sizes_0, x = attention_2_head_transpose)[name = string("attention_2_split_qkv_heads")]; tensor attention_2_q_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_2_q_rope_lhs_mult")]; int32 attention_2_q_rotate_half_split_num_splits_0 = const()[name = string("attention_2_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_2_q_rotate_half_split_axis_0 = const()[name = string("attention_2_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_2_q_rotate_half_split_0, tensor attention_2_q_rotate_half_split_1 = split(axis = attention_2_q_rotate_half_split_axis_0, num_splits = attention_2_q_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_0)[name = string("attention_2_q_rotate_half_split")]; fp16 attention_2_q_rotate_half_neg_y_0 = const()[name = string("attention_2_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_2_q_rotate_half_neg = mul(x = attention_2_q_rotate_half_split_1, y = attention_2_q_rotate_half_neg_y_0)[name = string("attention_2_q_rotate_half_neg")]; int32 attention_2_q_rotate_half_concat_axis_0 = const()[name = string("attention_2_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_2_q_rotate_half_concat_interleave_0 = const()[name = string("attention_2_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_2_q_rotate_half_concat = concat(axis = attention_2_q_rotate_half_concat_axis_0, interleave = attention_2_q_rotate_half_concat_interleave_0, values = (attention_2_q_rotate_half_neg, attention_2_q_rotate_half_split_0))[name = string("attention_2_q_rotate_half_concat")]; tensor attention_2_q_rope_rhs_mult = mul(x = attention_2_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_q_rope_rhs_mult")]; tensor attention_2_q_rope = add(x = attention_2_q_rope_lhs_mult, y = attention_2_q_rope_rhs_mult)[name = string("attention_2_q_rope")]; tensor attention_2_k_rope_lhs_mult = mul(x = attention_2_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_2_k_rope_lhs_mult")]; int32 attention_2_k_rotate_half_split_num_splits_0 = const()[name = string("attention_2_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_2_k_rotate_half_split_axis_0 = const()[name = string("attention_2_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_2_k_rotate_half_split_0, tensor attention_2_k_rotate_half_split_1 = split(axis = attention_2_k_rotate_half_split_axis_0, num_splits = attention_2_k_rotate_half_split_num_splits_0, x = attention_2_split_qkv_heads_1)[name = string("attention_2_k_rotate_half_split")]; fp16 attention_2_k_rotate_half_neg_y_0 = const()[name = string("attention_2_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_2_k_rotate_half_neg = mul(x = attention_2_k_rotate_half_split_1, y = attention_2_k_rotate_half_neg_y_0)[name = string("attention_2_k_rotate_half_neg")]; int32 attention_2_k_rotate_half_concat_axis_0 = const()[name = string("attention_2_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_2_k_rotate_half_concat_interleave_0 = const()[name = string("attention_2_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_2_k_rotate_half_concat = concat(axis = attention_2_k_rotate_half_concat_axis_0, interleave = attention_2_k_rotate_half_concat_interleave_0, values = (attention_2_k_rotate_half_neg, attention_2_k_rotate_half_split_0))[name = string("attention_2_k_rotate_half_concat")]; tensor attention_2_k_rope_rhs_mult = mul(x = attention_2_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_2_k_rope_rhs_mult")]; tensor attention_2_k_rope = add(x = attention_2_k_rope_lhs_mult, y = attention_2_k_rope_rhs_mult)[name = string("attention_2_k_rope")]; int32 attention_2_q_splits_axis_0 = const()[name = string("attention_2_q_splits_axis_0"), val = int32(1)]; int32 attention_2_q_splits_num_splits_0 = const()[name = string("attention_2_q_splits_num_splits_0"), val = int32(2)]; tensor attention_2_q_splits_0, tensor attention_2_q_splits_1 = split(axis = attention_2_q_splits_axis_0, num_splits = attention_2_q_splits_num_splits_0, x = attention_2_q_rope)[name = string("attention_2_q_splits")]; tensor attention_2_update_begin_0_values0_0 = const()[name = string("attention_2_update_begin_0_values0_0"), val = tensor([2])]; tensor attention_2_update_begin_0_values1_0 = const()[name = string("attention_2_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_2_update_begin_0_values3_0 = const()[name = string("attention_2_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_2_update_begin_0_axis_0 = const()[name = string("attention_2_update_begin_0_axis_0"), val = int32(0)]; bool attention_2_update_begin_0_interleave_0 = const()[name = string("attention_2_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_2_update_begin_0 = concat(axis = attention_2_update_begin_0_axis_0, interleave = attention_2_update_begin_0_interleave_0, values = (attention_2_update_begin_0_values0_0, attention_2_update_begin_0_values1_0, query_pos1, attention_2_update_begin_0_values3_0))[name = string("attention_2_update_begin_0")]; tensor attention_2_update_end_0_values0_0 = const()[name = string("attention_2_update_end_0_values0_0"), val = tensor([3])]; tensor attention_2_update_end_0_values1_0 = const()[name = string("attention_2_update_end_0_values1_0"), val = tensor([2])]; tensor attention_2_update_end_0_values3_0 = const()[name = string("attention_2_update_end_0_values3_0"), val = tensor([64])]; int32 attention_2_update_end_0_axis_0 = const()[name = string("attention_2_update_end_0_axis_0"), val = int32(0)]; bool attention_2_update_end_0_interleave_0 = const()[name = string("attention_2_update_end_0_interleave_0"), val = bool(false)]; tensor attention_2_update_end_0 = concat(axis = attention_2_update_end_0_axis_0, interleave = attention_2_update_end_0_interleave_0, values = (attention_2_update_end_0_values0_0, attention_2_update_end_0_values1_0, end_pos_0, attention_2_update_end_0_values3_0))[name = string("attention_2_update_end_0")]; tensor attention_2_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_updated_key_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_key_cache_0_squeeze_mask_0, update = attention_2_k_rope, x = coreml_update_state_2)[name = string("attention_2_updated_key_cache_0")]; write_state(data = attention_2_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_4 = read_state(input = key_cache_state)[name = string("coreml_update_state_52")]; tensor attention_2_key_cache_begin_0 = const()[name = string("attention_2_key_cache_begin_0"), val = tensor([2, 0, 0, 0])]; tensor attention_2_key_cache_end_0 = const()[name = string("attention_2_key_cache_end_0"), val = tensor([3, 2, 512, 64])]; tensor attention_2_key_cache_squeeze_mask_0 = const()[name = string("attention_2_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_key_cache = slice_by_index(begin = attention_2_key_cache_begin_0, end = attention_2_key_cache_end_0, squeeze_mask = attention_2_key_cache_squeeze_mask_0, x = coreml_update_state_4)[name = string("attention_2_key_cache")]; int32 attention_2_key_cache_head_axis_0 = const()[name = string("attention_2_key_cache_head_axis_0"), val = int32(1)]; int32 attention_2_key_cache_head_num_splits_0 = const()[name = string("attention_2_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_2_key_cache_head_0, tensor attention_2_key_cache_head_1 = split(axis = attention_2_key_cache_head_axis_0, num_splits = attention_2_key_cache_head_num_splits_0, x = attention_2_key_cache)[name = string("attention_2_key_cache_head")]; tensor attention_2_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_2_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_updated_value_cache_0 = slice_update(begin = attention_2_update_begin_0, end = attention_2_update_end_0, squeeze_mask = attention_2_updated_value_cache_0_squeeze_mask_0, update = attention_2_split_qkv_heads_2, x = coreml_update_state_3)[name = string("attention_2_updated_value_cache_0")]; write_state(data = attention_2_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_5 = read_state(input = value_cache_state)[name = string("coreml_update_state_53")]; tensor attention_2_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_2_slice_current_layer_value_cache_begin_0"), val = tensor([2, 0, 0, 0])]; tensor attention_2_slice_current_layer_value_cache_end_0 = const()[name = string("attention_2_slice_current_layer_value_cache_end_0"), val = tensor([3, 2, 512, 64])]; tensor attention_2_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_2_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_2_slice_current_layer_value_cache = slice_by_index(begin = attention_2_slice_current_layer_value_cache_begin_0, end = attention_2_slice_current_layer_value_cache_end_0, squeeze_mask = attention_2_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_5)[name = string("attention_2_slice_current_layer_value_cache")]; int32 attention_2_slice_value_cache_heads_axis_0 = const()[name = string("attention_2_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_2_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_2_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_2_slice_value_cache_heads_0, tensor attention_2_slice_value_cache_heads_1 = split(axis = attention_2_slice_value_cache_heads_axis_0, num_splits = attention_2_slice_value_cache_heads_num_splits_0, x = attention_2_slice_current_layer_value_cache)[name = string("attention_2_slice_value_cache_heads")]; bool attention_2_scores_0_transpose_y_0 = const()[name = string("attention_2_scores_0_transpose_y_0"), val = bool(true)]; bool attention_2_scores_0_transpose_x_0 = const()[name = string("attention_2_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_2_scores_0 = matmul(transpose_x = attention_2_scores_0_transpose_x_0, transpose_y = attention_2_scores_0_transpose_y_0, x = attention_2_key_cache_head_0, y = attention_2_q_splits_0)[name = string("attention_2_scores_0")]; fp16 attention_2_scaled_scores_0_y_0 = const()[name = string("attention_2_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_2_scaled_scores_0 = mul(x = attention_2_scores_0, y = attention_2_scaled_scores_0_y_0)[name = string("attention_2_scaled_scores_0")]; tensor attention_2_masked_scaled_scores_0 = add(x = attention_2_scaled_scores_0, y = transpose_0)[name = string("attention_2_masked_scaled_scores_0")]; int32 softmax_4_axis_0 = const()[name = string("softmax_4_axis_0"), val = int32(-2)]; tensor softmax_4 = softmax(axis = softmax_4_axis_0, x = attention_2_masked_scaled_scores_0)[name = string("softmax_4")]; bool attention_2_attention_0_transpose_x_0 = const()[name = string("attention_2_attention_0_transpose_x_0"), val = bool(true)]; bool attention_2_attention_0_transpose_y_0 = const()[name = string("attention_2_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_2_attention_0 = matmul(transpose_x = attention_2_attention_0_transpose_x_0, transpose_y = attention_2_attention_0_transpose_y_0, x = softmax_4, y = attention_2_slice_value_cache_heads_0)[name = string("attention_2_attention_0")]; bool attention_2_scores_1_transpose_y_0 = const()[name = string("attention_2_scores_1_transpose_y_0"), val = bool(true)]; bool attention_2_scores_1_transpose_x_0 = const()[name = string("attention_2_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_2_scores_1 = matmul(transpose_x = attention_2_scores_1_transpose_x_0, transpose_y = attention_2_scores_1_transpose_y_0, x = attention_2_key_cache_head_1, y = attention_2_q_splits_1)[name = string("attention_2_scores_1")]; fp16 attention_2_scaled_scores_1_y_0 = const()[name = string("attention_2_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_2_scaled_scores_1 = mul(x = attention_2_scores_1, y = attention_2_scaled_scores_1_y_0)[name = string("attention_2_scaled_scores_1")]; tensor attention_2_masked_scaled_scores_1 = add(x = attention_2_scaled_scores_1, y = transpose_0)[name = string("attention_2_masked_scaled_scores_1")]; int32 softmax_5_axis_0 = const()[name = string("softmax_5_axis_0"), val = int32(-2)]; tensor softmax_5 = softmax(axis = softmax_5_axis_0, x = attention_2_masked_scaled_scores_1)[name = string("softmax_5")]; bool attention_2_attention_1_transpose_x_0 = const()[name = string("attention_2_attention_1_transpose_x_0"), val = bool(true)]; bool attention_2_attention_1_transpose_y_0 = const()[name = string("attention_2_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_2_attention_1 = matmul(transpose_x = attention_2_attention_1_transpose_x_0, transpose_y = attention_2_attention_1_transpose_y_0, x = softmax_5, y = attention_2_slice_value_cache_heads_1)[name = string("attention_2_attention_1")]; int32 attention_2_concat_attention_all_heads_axis_0 = const()[name = string("attention_2_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_2_concat_attention_all_heads_interleave_0 = const()[name = string("attention_2_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_2_concat_attention_all_heads = concat(axis = attention_2_concat_attention_all_heads_axis_0, interleave = attention_2_concat_attention_all_heads_interleave_0, values = (attention_2_attention_0, attention_2_attention_1))[name = string("attention_2_concat_attention_all_heads")]; tensor attention_2_channels_first_retransposed_perm_0 = const()[name = string("attention_2_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_2_reshaped_shape_0 = const()[name = string("attention_2_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_2_channels_first_retransposed = transpose(perm = attention_2_channels_first_retransposed_perm_0, x = attention_2_concat_attention_all_heads)[name = string("transpose_43")]; tensor attention_2_reshaped = reshape(shape = attention_2_reshaped_shape_0, x = attention_2_channels_first_retransposed)[name = string("attention_2_reshaped")]; tensor attention_2_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315187968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315790144))))[name = string("attention_2_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_6 = constexpr_blockwise_shift_scale(data = attention_2_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315818880))))[name = string("constexpr_blockwise_shift_scale_6")]; tensor attention_2_outproj_strides_0 = const()[name = string("attention_2_outproj_strides_0"), val = tensor([1])]; string attention_2_outproj_pad_type_0 = const()[name = string("attention_2_outproj_pad_type_0"), val = string("valid")]; tensor attention_2_outproj_pad_0 = const()[name = string("attention_2_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_2_outproj_dilations_0 = const()[name = string("attention_2_outproj_dilations_0"), val = tensor([1])]; int32 attention_2_outproj_groups_0 = const()[name = string("attention_2_outproj_groups_0"), val = int32(1)]; tensor attention_2_outproj = conv(dilations = attention_2_outproj_dilations_0, groups = attention_2_outproj_groups_0, pad = attention_2_outproj_pad_0, pad_type = attention_2_outproj_pad_type_0, strides = attention_2_outproj_strides_0, weight = constexpr_blockwise_shift_scale_6, x = attention_2_reshaped)[name = string("attention_2_outproj")]; tensor block_2_residual_1 = add(x = block_1_residual_2, y = attention_2_outproj)[name = string("block_2_residual_1")]; tensor block_2_ffn_rmsnorm_abs = abs(x = block_2_residual_1)[name = string("block_2_ffn_rmsnorm_abs")]; tensor block_2_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_2_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_2_ffn_rmsnorm_maxval = reduce_max(axes = block_2_ffn_rmsnorm_maxval_axes_0, keep_dims = block_2_ffn_rmsnorm_maxval_keep_dims_0, x = block_2_ffn_rmsnorm_abs)[name = string("block_2_ffn_rmsnorm_maxval")]; fp16 block_2_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_2_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_2_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_2_ffn_rmsnorm_maxval_clipped = clip(alpha = block_2_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_2_ffn_rmsnorm_maxval_clipped_beta_0, x = block_2_ffn_rmsnorm_maxval)[name = string("block_2_ffn_rmsnorm_maxval_clipped")]; tensor block_2_ffn_rmsnorm_scaled = real_div(x = block_2_residual_1, y = block_2_ffn_rmsnorm_maxval_clipped)[name = string("block_2_ffn_rmsnorm_scaled")]; tensor block_2_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_2_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_2_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_2_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_2_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_2_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_2_ffn_rmsnorm_scaled)[name = string("block_2_ffn_rmsnorm_squared_sum")]; fp16 block_2_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_2_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_2_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_2_ffn_rmsnorm_rsqrt_epsilon_0, x = block_2_ffn_rmsnorm_squared_sum)[name = string("block_2_ffn_rmsnorm_rsqrt")]; fp16 block_2_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_2_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_2_ffn_rmsnorm_dim_scaled = mul(x = block_2_ffn_rmsnorm_scaled, y = block_2_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_2_ffn_rmsnorm_dim_scaled")]; tensor block_2_ffn_rmsnorm_normalized = mul(x = block_2_ffn_rmsnorm_dim_scaled, y = block_2_ffn_rmsnorm_rsqrt)[name = string("block_2_ffn_rmsnorm_normalized")]; tensor block_2_ffn_rmsnorm_y_0 = const()[name = string("block_2_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315820736)))]; tensor block_2_ffn_rmsnorm = mul(x = block_2_ffn_rmsnorm_normalized, y = block_2_ffn_rmsnorm_y_0)[name = string("block_2_ffn_rmsnorm")]; tensor block_2_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315822592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319091264))))[name = string("block_2_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_7 = constexpr_blockwise_shift_scale(data = block_2_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319246976))))[name = string("constexpr_blockwise_shift_scale_7")]; tensor block_2_ffn_inproj_strides_0 = const()[name = string("block_2_ffn_inproj_strides_0"), val = tensor([1])]; string block_2_ffn_inproj_pad_type_0 = const()[name = string("block_2_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_2_ffn_inproj_pad_0 = const()[name = string("block_2_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_2_ffn_inproj_dilations_0 = const()[name = string("block_2_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_2_ffn_inproj_groups_0 = const()[name = string("block_2_ffn_inproj_groups_0"), val = int32(1)]; tensor block_2_ffn_inproj = conv(dilations = block_2_ffn_inproj_dilations_0, groups = block_2_ffn_inproj_groups_0, pad = block_2_ffn_inproj_pad_0, pad_type = block_2_ffn_inproj_pad_type_0, strides = block_2_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_7, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_inproj")]; tensor block_2_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319256768))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322525440))))[name = string("block_2_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_8 = constexpr_blockwise_shift_scale(data = block_2_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322681152))))[name = string("constexpr_blockwise_shift_scale_8")]; tensor block_2_ffn_g_strides_0 = const()[name = string("block_2_ffn_g_strides_0"), val = tensor([1])]; string block_2_ffn_g_pad_type_0 = const()[name = string("block_2_ffn_g_pad_type_0"), val = string("valid")]; tensor block_2_ffn_g_pad_0 = const()[name = string("block_2_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_2_ffn_g_dilations_0 = const()[name = string("block_2_ffn_g_dilations_0"), val = tensor([1])]; int32 block_2_ffn_g_groups_0 = const()[name = string("block_2_ffn_g_groups_0"), val = int32(1)]; tensor block_2_ffn_g = conv(dilations = block_2_ffn_g_dilations_0, groups = block_2_ffn_g_groups_0, pad = block_2_ffn_g_pad_0, pad_type = block_2_ffn_g_pad_type_0, strides = block_2_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_8, x = block_2_ffn_rmsnorm)[name = string("block_2_ffn_g")]; tensor block_2_ffn_g_activation = silu(x = block_2_ffn_g)[name = string("block_2_ffn_g_activation")]; tensor block_2_ffn_x_gated = mul(x = block_2_ffn_inproj, y = block_2_ffn_g_activation)[name = string("block_2_ffn_x_gated")]; tensor block_2_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322690944))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325959616))))[name = string("block_2_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_9 = constexpr_blockwise_shift_scale(data = block_2_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325988352))))[name = string("constexpr_blockwise_shift_scale_9")]; tensor block_2_ffn_outproj_strides_0 = const()[name = string("block_2_ffn_outproj_strides_0"), val = tensor([1])]; string block_2_ffn_outproj_pad_type_0 = const()[name = string("block_2_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_2_ffn_outproj_pad_0 = const()[name = string("block_2_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_2_ffn_outproj_dilations_0 = const()[name = string("block_2_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_2_ffn_outproj_groups_0 = const()[name = string("block_2_ffn_outproj_groups_0"), val = int32(1)]; tensor block_2_ffn_outproj = conv(dilations = block_2_ffn_outproj_dilations_0, groups = block_2_ffn_outproj_groups_0, pad = block_2_ffn_outproj_pad_0, pad_type = block_2_ffn_outproj_pad_type_0, strides = block_2_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_9, x = block_2_ffn_x_gated)[name = string("block_2_ffn_outproj")]; tensor block_2_residual_2 = add(x = block_2_ffn_outproj, y = block_2_residual_1)[name = string("block_2_residual_2")]; tensor block_3_attention_rmsnorm_abs = abs(x = block_2_residual_2)[name = string("block_3_attention_rmsnorm_abs")]; tensor block_3_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_3_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_3_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_3_attention_rmsnorm_maxval = reduce_max(axes = block_3_attention_rmsnorm_maxval_axes_0, keep_dims = block_3_attention_rmsnorm_maxval_keep_dims_0, x = block_3_attention_rmsnorm_abs)[name = string("block_3_attention_rmsnorm_maxval")]; fp16 block_3_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_3_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_3_attention_rmsnorm_maxval_clipped = clip(alpha = block_3_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_3_attention_rmsnorm_maxval_clipped_beta_0, x = block_3_attention_rmsnorm_maxval)[name = string("block_3_attention_rmsnorm_maxval_clipped")]; tensor block_3_attention_rmsnorm_scaled = real_div(x = block_2_residual_2, y = block_3_attention_rmsnorm_maxval_clipped)[name = string("block_3_attention_rmsnorm_scaled")]; tensor block_3_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_3_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_3_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_3_attention_rmsnorm_squared_sum_keep_dims_0, x = block_3_attention_rmsnorm_scaled)[name = string("block_3_attention_rmsnorm_squared_sum")]; fp16 block_3_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_3_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_3_attention_rmsnorm_rsqrt_epsilon_0, x = block_3_attention_rmsnorm_squared_sum)[name = string("block_3_attention_rmsnorm_rsqrt")]; fp16 block_3_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_3_attention_rmsnorm_dim_scaled = mul(x = block_3_attention_rmsnorm_scaled, y = block_3_attention_rmsnorm_dim_scaled_y_0)[name = string("block_3_attention_rmsnorm_dim_scaled")]; tensor block_3_attention_rmsnorm_normalized = mul(x = block_3_attention_rmsnorm_dim_scaled, y = block_3_attention_rmsnorm_rsqrt)[name = string("block_3_attention_rmsnorm_normalized")]; tensor block_3_attention_rmsnorm_y_0 = const()[name = string("block_3_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325990208)))]; tensor block_3_attention_rmsnorm = mul(x = block_3_attention_rmsnorm_normalized, y = block_3_attention_rmsnorm_y_0)[name = string("block_3_attention_rmsnorm")]; tensor attention_3_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325992064))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326766272))))[name = string("attention_3_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_10 = constexpr_blockwise_shift_scale(data = attention_3_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326803200))))[name = string("constexpr_blockwise_shift_scale_10")]; tensor attention_3_qkvproj_bias_0 = const()[name = string("attention_3_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326805568)))]; tensor attention_3_qkvproj_strides_0 = const()[name = string("attention_3_qkvproj_strides_0"), val = tensor([1])]; string attention_3_qkvproj_pad_type_0 = const()[name = string("attention_3_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_3_qkvproj_pad_0 = const()[name = string("attention_3_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_3_qkvproj_dilations_0 = const()[name = string("attention_3_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_3_qkvproj_groups_0 = const()[name = string("attention_3_qkvproj_groups_0"), val = int32(1)]; tensor attention_3_qkvproj = conv(bias = attention_3_qkvproj_bias_0, dilations = attention_3_qkvproj_dilations_0, groups = attention_3_qkvproj_groups_0, pad = attention_3_qkvproj_pad_0, pad_type = attention_3_qkvproj_pad_type_0, strides = attention_3_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_10, x = block_3_attention_rmsnorm)[name = string("attention_3_qkvproj")]; tensor attention_3_head_reshape_shape_0 = const()[name = string("attention_3_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_3_head_reshape = reshape(shape = attention_3_head_reshape_shape_0, x = attention_3_qkvproj)[name = string("attention_3_head_reshape")]; tensor attention_3_head_transpose_perm_0 = const()[name = string("attention_3_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_3_split_qkv_heads_axis_0 = const()[name = string("attention_3_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_3_split_qkv_heads_split_sizes_0 = const()[name = string("attention_3_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_3_head_transpose = transpose(perm = attention_3_head_transpose_perm_0, x = attention_3_head_reshape)[name = string("transpose_42")]; tensor attention_3_split_qkv_heads_0, tensor attention_3_split_qkv_heads_1, tensor attention_3_split_qkv_heads_2 = split(axis = attention_3_split_qkv_heads_axis_0, split_sizes = attention_3_split_qkv_heads_split_sizes_0, x = attention_3_head_transpose)[name = string("attention_3_split_qkv_heads")]; tensor attention_3_q_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_3_q_rope_lhs_mult")]; int32 attention_3_q_rotate_half_split_num_splits_0 = const()[name = string("attention_3_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_3_q_rotate_half_split_axis_0 = const()[name = string("attention_3_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_3_q_rotate_half_split_0, tensor attention_3_q_rotate_half_split_1 = split(axis = attention_3_q_rotate_half_split_axis_0, num_splits = attention_3_q_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_0)[name = string("attention_3_q_rotate_half_split")]; fp16 attention_3_q_rotate_half_neg_y_0 = const()[name = string("attention_3_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_3_q_rotate_half_neg = mul(x = attention_3_q_rotate_half_split_1, y = attention_3_q_rotate_half_neg_y_0)[name = string("attention_3_q_rotate_half_neg")]; int32 attention_3_q_rotate_half_concat_axis_0 = const()[name = string("attention_3_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_3_q_rotate_half_concat_interleave_0 = const()[name = string("attention_3_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_3_q_rotate_half_concat = concat(axis = attention_3_q_rotate_half_concat_axis_0, interleave = attention_3_q_rotate_half_concat_interleave_0, values = (attention_3_q_rotate_half_neg, attention_3_q_rotate_half_split_0))[name = string("attention_3_q_rotate_half_concat")]; tensor attention_3_q_rope_rhs_mult = mul(x = attention_3_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_q_rope_rhs_mult")]; tensor attention_3_q_rope = add(x = attention_3_q_rope_lhs_mult, y = attention_3_q_rope_rhs_mult)[name = string("attention_3_q_rope")]; tensor attention_3_k_rope_lhs_mult = mul(x = attention_3_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_3_k_rope_lhs_mult")]; int32 attention_3_k_rotate_half_split_num_splits_0 = const()[name = string("attention_3_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_3_k_rotate_half_split_axis_0 = const()[name = string("attention_3_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_3_k_rotate_half_split_0, tensor attention_3_k_rotate_half_split_1 = split(axis = attention_3_k_rotate_half_split_axis_0, num_splits = attention_3_k_rotate_half_split_num_splits_0, x = attention_3_split_qkv_heads_1)[name = string("attention_3_k_rotate_half_split")]; fp16 attention_3_k_rotate_half_neg_y_0 = const()[name = string("attention_3_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_3_k_rotate_half_neg = mul(x = attention_3_k_rotate_half_split_1, y = attention_3_k_rotate_half_neg_y_0)[name = string("attention_3_k_rotate_half_neg")]; int32 attention_3_k_rotate_half_concat_axis_0 = const()[name = string("attention_3_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_3_k_rotate_half_concat_interleave_0 = const()[name = string("attention_3_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_3_k_rotate_half_concat = concat(axis = attention_3_k_rotate_half_concat_axis_0, interleave = attention_3_k_rotate_half_concat_interleave_0, values = (attention_3_k_rotate_half_neg, attention_3_k_rotate_half_split_0))[name = string("attention_3_k_rotate_half_concat")]; tensor attention_3_k_rope_rhs_mult = mul(x = attention_3_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_3_k_rope_rhs_mult")]; tensor attention_3_k_rope = add(x = attention_3_k_rope_lhs_mult, y = attention_3_k_rope_rhs_mult)[name = string("attention_3_k_rope")]; int32 attention_3_q_splits_axis_0 = const()[name = string("attention_3_q_splits_axis_0"), val = int32(1)]; int32 attention_3_q_splits_num_splits_0 = const()[name = string("attention_3_q_splits_num_splits_0"), val = int32(2)]; tensor attention_3_q_splits_0, tensor attention_3_q_splits_1 = split(axis = attention_3_q_splits_axis_0, num_splits = attention_3_q_splits_num_splits_0, x = attention_3_q_rope)[name = string("attention_3_q_splits")]; tensor attention_3_update_begin_0_values0_0 = const()[name = string("attention_3_update_begin_0_values0_0"), val = tensor([3])]; tensor attention_3_update_begin_0_values1_0 = const()[name = string("attention_3_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_3_update_begin_0_values3_0 = const()[name = string("attention_3_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_3_update_begin_0_axis_0 = const()[name = string("attention_3_update_begin_0_axis_0"), val = int32(0)]; bool attention_3_update_begin_0_interleave_0 = const()[name = string("attention_3_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_3_update_begin_0 = concat(axis = attention_3_update_begin_0_axis_0, interleave = attention_3_update_begin_0_interleave_0, values = (attention_3_update_begin_0_values0_0, attention_3_update_begin_0_values1_0, query_pos1, attention_3_update_begin_0_values3_0))[name = string("attention_3_update_begin_0")]; tensor attention_3_update_end_0_values0_0 = const()[name = string("attention_3_update_end_0_values0_0"), val = tensor([4])]; tensor attention_3_update_end_0_values1_0 = const()[name = string("attention_3_update_end_0_values1_0"), val = tensor([2])]; tensor attention_3_update_end_0_values3_0 = const()[name = string("attention_3_update_end_0_values3_0"), val = tensor([64])]; int32 attention_3_update_end_0_axis_0 = const()[name = string("attention_3_update_end_0_axis_0"), val = int32(0)]; bool attention_3_update_end_0_interleave_0 = const()[name = string("attention_3_update_end_0_interleave_0"), val = bool(false)]; tensor attention_3_update_end_0 = concat(axis = attention_3_update_end_0_axis_0, interleave = attention_3_update_end_0_interleave_0, values = (attention_3_update_end_0_values0_0, attention_3_update_end_0_values1_0, end_pos_0, attention_3_update_end_0_values3_0))[name = string("attention_3_update_end_0")]; tensor attention_3_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_updated_key_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_key_cache_0_squeeze_mask_0, update = attention_3_k_rope, x = coreml_update_state_4)[name = string("attention_3_updated_key_cache_0")]; write_state(data = attention_3_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_6 = read_state(input = key_cache_state)[name = string("coreml_update_state_54")]; tensor attention_3_key_cache_begin_0 = const()[name = string("attention_3_key_cache_begin_0"), val = tensor([3, 0, 0, 0])]; tensor attention_3_key_cache_end_0 = const()[name = string("attention_3_key_cache_end_0"), val = tensor([4, 2, 512, 64])]; tensor attention_3_key_cache_squeeze_mask_0 = const()[name = string("attention_3_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_key_cache = slice_by_index(begin = attention_3_key_cache_begin_0, end = attention_3_key_cache_end_0, squeeze_mask = attention_3_key_cache_squeeze_mask_0, x = coreml_update_state_6)[name = string("attention_3_key_cache")]; int32 attention_3_key_cache_head_axis_0 = const()[name = string("attention_3_key_cache_head_axis_0"), val = int32(1)]; int32 attention_3_key_cache_head_num_splits_0 = const()[name = string("attention_3_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_3_key_cache_head_0, tensor attention_3_key_cache_head_1 = split(axis = attention_3_key_cache_head_axis_0, num_splits = attention_3_key_cache_head_num_splits_0, x = attention_3_key_cache)[name = string("attention_3_key_cache_head")]; tensor attention_3_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_3_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_updated_value_cache_0 = slice_update(begin = attention_3_update_begin_0, end = attention_3_update_end_0, squeeze_mask = attention_3_updated_value_cache_0_squeeze_mask_0, update = attention_3_split_qkv_heads_2, x = coreml_update_state_5)[name = string("attention_3_updated_value_cache_0")]; write_state(data = attention_3_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_7 = read_state(input = value_cache_state)[name = string("coreml_update_state_55")]; tensor attention_3_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_3_slice_current_layer_value_cache_begin_0"), val = tensor([3, 0, 0, 0])]; tensor attention_3_slice_current_layer_value_cache_end_0 = const()[name = string("attention_3_slice_current_layer_value_cache_end_0"), val = tensor([4, 2, 512, 64])]; tensor attention_3_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_3_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_3_slice_current_layer_value_cache = slice_by_index(begin = attention_3_slice_current_layer_value_cache_begin_0, end = attention_3_slice_current_layer_value_cache_end_0, squeeze_mask = attention_3_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_7)[name = string("attention_3_slice_current_layer_value_cache")]; int32 attention_3_slice_value_cache_heads_axis_0 = const()[name = string("attention_3_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_3_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_3_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_3_slice_value_cache_heads_0, tensor attention_3_slice_value_cache_heads_1 = split(axis = attention_3_slice_value_cache_heads_axis_0, num_splits = attention_3_slice_value_cache_heads_num_splits_0, x = attention_3_slice_current_layer_value_cache)[name = string("attention_3_slice_value_cache_heads")]; bool attention_3_scores_0_transpose_y_0 = const()[name = string("attention_3_scores_0_transpose_y_0"), val = bool(true)]; bool attention_3_scores_0_transpose_x_0 = const()[name = string("attention_3_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_3_scores_0 = matmul(transpose_x = attention_3_scores_0_transpose_x_0, transpose_y = attention_3_scores_0_transpose_y_0, x = attention_3_key_cache_head_0, y = attention_3_q_splits_0)[name = string("attention_3_scores_0")]; fp16 attention_3_scaled_scores_0_y_0 = const()[name = string("attention_3_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_3_scaled_scores_0 = mul(x = attention_3_scores_0, y = attention_3_scaled_scores_0_y_0)[name = string("attention_3_scaled_scores_0")]; tensor attention_3_masked_scaled_scores_0 = add(x = attention_3_scaled_scores_0, y = transpose_0)[name = string("attention_3_masked_scaled_scores_0")]; int32 softmax_6_axis_0 = const()[name = string("softmax_6_axis_0"), val = int32(-2)]; tensor softmax_6 = softmax(axis = softmax_6_axis_0, x = attention_3_masked_scaled_scores_0)[name = string("softmax_6")]; bool attention_3_attention_0_transpose_x_0 = const()[name = string("attention_3_attention_0_transpose_x_0"), val = bool(true)]; bool attention_3_attention_0_transpose_y_0 = const()[name = string("attention_3_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_3_attention_0 = matmul(transpose_x = attention_3_attention_0_transpose_x_0, transpose_y = attention_3_attention_0_transpose_y_0, x = softmax_6, y = attention_3_slice_value_cache_heads_0)[name = string("attention_3_attention_0")]; bool attention_3_scores_1_transpose_y_0 = const()[name = string("attention_3_scores_1_transpose_y_0"), val = bool(true)]; bool attention_3_scores_1_transpose_x_0 = const()[name = string("attention_3_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_3_scores_1 = matmul(transpose_x = attention_3_scores_1_transpose_x_0, transpose_y = attention_3_scores_1_transpose_y_0, x = attention_3_key_cache_head_1, y = attention_3_q_splits_1)[name = string("attention_3_scores_1")]; fp16 attention_3_scaled_scores_1_y_0 = const()[name = string("attention_3_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_3_scaled_scores_1 = mul(x = attention_3_scores_1, y = attention_3_scaled_scores_1_y_0)[name = string("attention_3_scaled_scores_1")]; tensor attention_3_masked_scaled_scores_1 = add(x = attention_3_scaled_scores_1, y = transpose_0)[name = string("attention_3_masked_scaled_scores_1")]; int32 softmax_7_axis_0 = const()[name = string("softmax_7_axis_0"), val = int32(-2)]; tensor softmax_7 = softmax(axis = softmax_7_axis_0, x = attention_3_masked_scaled_scores_1)[name = string("softmax_7")]; bool attention_3_attention_1_transpose_x_0 = const()[name = string("attention_3_attention_1_transpose_x_0"), val = bool(true)]; bool attention_3_attention_1_transpose_y_0 = const()[name = string("attention_3_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_3_attention_1 = matmul(transpose_x = attention_3_attention_1_transpose_x_0, transpose_y = attention_3_attention_1_transpose_y_0, x = softmax_7, y = attention_3_slice_value_cache_heads_1)[name = string("attention_3_attention_1")]; int32 attention_3_concat_attention_all_heads_axis_0 = const()[name = string("attention_3_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_3_concat_attention_all_heads_interleave_0 = const()[name = string("attention_3_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_3_concat_attention_all_heads = concat(axis = attention_3_concat_attention_all_heads_axis_0, interleave = attention_3_concat_attention_all_heads_interleave_0, values = (attention_3_attention_0, attention_3_attention_1))[name = string("attention_3_concat_attention_all_heads")]; tensor attention_3_channels_first_retransposed_perm_0 = const()[name = string("attention_3_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_3_reshaped_shape_0 = const()[name = string("attention_3_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_3_channels_first_retransposed = transpose(perm = attention_3_channels_first_retransposed_perm_0, x = attention_3_concat_attention_all_heads)[name = string("transpose_41")]; tensor attention_3_reshaped = reshape(shape = attention_3_reshaped_shape_0, x = attention_3_channels_first_retransposed)[name = string("attention_3_reshaped")]; tensor attention_3_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326807936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327410112))))[name = string("attention_3_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_11 = constexpr_blockwise_shift_scale(data = attention_3_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327438848))))[name = string("constexpr_blockwise_shift_scale_11")]; tensor attention_3_outproj_strides_0 = const()[name = string("attention_3_outproj_strides_0"), val = tensor([1])]; string attention_3_outproj_pad_type_0 = const()[name = string("attention_3_outproj_pad_type_0"), val = string("valid")]; tensor attention_3_outproj_pad_0 = const()[name = string("attention_3_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_3_outproj_dilations_0 = const()[name = string("attention_3_outproj_dilations_0"), val = tensor([1])]; int32 attention_3_outproj_groups_0 = const()[name = string("attention_3_outproj_groups_0"), val = int32(1)]; tensor attention_3_outproj = conv(dilations = attention_3_outproj_dilations_0, groups = attention_3_outproj_groups_0, pad = attention_3_outproj_pad_0, pad_type = attention_3_outproj_pad_type_0, strides = attention_3_outproj_strides_0, weight = constexpr_blockwise_shift_scale_11, x = attention_3_reshaped)[name = string("attention_3_outproj")]; tensor block_3_residual_1 = add(x = block_2_residual_2, y = attention_3_outproj)[name = string("block_3_residual_1")]; tensor block_3_ffn_rmsnorm_abs = abs(x = block_3_residual_1)[name = string("block_3_ffn_rmsnorm_abs")]; tensor block_3_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_3_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_3_ffn_rmsnorm_maxval = reduce_max(axes = block_3_ffn_rmsnorm_maxval_axes_0, keep_dims = block_3_ffn_rmsnorm_maxval_keep_dims_0, x = block_3_ffn_rmsnorm_abs)[name = string("block_3_ffn_rmsnorm_maxval")]; fp16 block_3_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_3_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_3_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_3_ffn_rmsnorm_maxval_clipped = clip(alpha = block_3_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_3_ffn_rmsnorm_maxval_clipped_beta_0, x = block_3_ffn_rmsnorm_maxval)[name = string("block_3_ffn_rmsnorm_maxval_clipped")]; tensor block_3_ffn_rmsnorm_scaled = real_div(x = block_3_residual_1, y = block_3_ffn_rmsnorm_maxval_clipped)[name = string("block_3_ffn_rmsnorm_scaled")]; tensor block_3_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_3_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_3_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_3_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_3_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_3_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_3_ffn_rmsnorm_scaled)[name = string("block_3_ffn_rmsnorm_squared_sum")]; fp16 block_3_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_3_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_3_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_3_ffn_rmsnorm_rsqrt_epsilon_0, x = block_3_ffn_rmsnorm_squared_sum)[name = string("block_3_ffn_rmsnorm_rsqrt")]; fp16 block_3_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_3_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_3_ffn_rmsnorm_dim_scaled = mul(x = block_3_ffn_rmsnorm_scaled, y = block_3_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_3_ffn_rmsnorm_dim_scaled")]; tensor block_3_ffn_rmsnorm_normalized = mul(x = block_3_ffn_rmsnorm_dim_scaled, y = block_3_ffn_rmsnorm_rsqrt)[name = string("block_3_ffn_rmsnorm_normalized")]; tensor block_3_ffn_rmsnorm_y_0 = const()[name = string("block_3_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327440704)))]; tensor block_3_ffn_rmsnorm = mul(x = block_3_ffn_rmsnorm_normalized, y = block_3_ffn_rmsnorm_y_0)[name = string("block_3_ffn_rmsnorm")]; tensor block_3_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(327442560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330711232))))[name = string("block_3_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_12 = constexpr_blockwise_shift_scale(data = block_3_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330866944))))[name = string("constexpr_blockwise_shift_scale_12")]; tensor block_3_ffn_inproj_strides_0 = const()[name = string("block_3_ffn_inproj_strides_0"), val = tensor([1])]; string block_3_ffn_inproj_pad_type_0 = const()[name = string("block_3_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_3_ffn_inproj_pad_0 = const()[name = string("block_3_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_3_ffn_inproj_dilations_0 = const()[name = string("block_3_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_3_ffn_inproj_groups_0 = const()[name = string("block_3_ffn_inproj_groups_0"), val = int32(1)]; tensor block_3_ffn_inproj = conv(dilations = block_3_ffn_inproj_dilations_0, groups = block_3_ffn_inproj_groups_0, pad = block_3_ffn_inproj_pad_0, pad_type = block_3_ffn_inproj_pad_type_0, strides = block_3_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_12, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_inproj")]; tensor block_3_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330876736))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334145408))))[name = string("block_3_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_13 = constexpr_blockwise_shift_scale(data = block_3_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334301120))))[name = string("constexpr_blockwise_shift_scale_13")]; tensor block_3_ffn_g_strides_0 = const()[name = string("block_3_ffn_g_strides_0"), val = tensor([1])]; string block_3_ffn_g_pad_type_0 = const()[name = string("block_3_ffn_g_pad_type_0"), val = string("valid")]; tensor block_3_ffn_g_pad_0 = const()[name = string("block_3_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_3_ffn_g_dilations_0 = const()[name = string("block_3_ffn_g_dilations_0"), val = tensor([1])]; int32 block_3_ffn_g_groups_0 = const()[name = string("block_3_ffn_g_groups_0"), val = int32(1)]; tensor block_3_ffn_g = conv(dilations = block_3_ffn_g_dilations_0, groups = block_3_ffn_g_groups_0, pad = block_3_ffn_g_pad_0, pad_type = block_3_ffn_g_pad_type_0, strides = block_3_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_13, x = block_3_ffn_rmsnorm)[name = string("block_3_ffn_g")]; tensor block_3_ffn_g_activation = silu(x = block_3_ffn_g)[name = string("block_3_ffn_g_activation")]; tensor block_3_ffn_x_gated = mul(x = block_3_ffn_inproj, y = block_3_ffn_g_activation)[name = string("block_3_ffn_x_gated")]; tensor block_3_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334310912))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337579584))))[name = string("block_3_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_14 = constexpr_blockwise_shift_scale(data = block_3_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337608320))))[name = string("constexpr_blockwise_shift_scale_14")]; tensor block_3_ffn_outproj_strides_0 = const()[name = string("block_3_ffn_outproj_strides_0"), val = tensor([1])]; string block_3_ffn_outproj_pad_type_0 = const()[name = string("block_3_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_3_ffn_outproj_pad_0 = const()[name = string("block_3_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_3_ffn_outproj_dilations_0 = const()[name = string("block_3_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_3_ffn_outproj_groups_0 = const()[name = string("block_3_ffn_outproj_groups_0"), val = int32(1)]; tensor block_3_ffn_outproj = conv(dilations = block_3_ffn_outproj_dilations_0, groups = block_3_ffn_outproj_groups_0, pad = block_3_ffn_outproj_pad_0, pad_type = block_3_ffn_outproj_pad_type_0, strides = block_3_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_14, x = block_3_ffn_x_gated)[name = string("block_3_ffn_outproj")]; tensor block_3_residual_2 = add(x = block_3_ffn_outproj, y = block_3_residual_1)[name = string("block_3_residual_2")]; tensor block_4_attention_rmsnorm_abs = abs(x = block_3_residual_2)[name = string("block_4_attention_rmsnorm_abs")]; tensor block_4_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_4_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_4_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_4_attention_rmsnorm_maxval = reduce_max(axes = block_4_attention_rmsnorm_maxval_axes_0, keep_dims = block_4_attention_rmsnorm_maxval_keep_dims_0, x = block_4_attention_rmsnorm_abs)[name = string("block_4_attention_rmsnorm_maxval")]; fp16 block_4_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_4_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_4_attention_rmsnorm_maxval_clipped = clip(alpha = block_4_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_4_attention_rmsnorm_maxval_clipped_beta_0, x = block_4_attention_rmsnorm_maxval)[name = string("block_4_attention_rmsnorm_maxval_clipped")]; tensor block_4_attention_rmsnorm_scaled = real_div(x = block_3_residual_2, y = block_4_attention_rmsnorm_maxval_clipped)[name = string("block_4_attention_rmsnorm_scaled")]; tensor block_4_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_4_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_4_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_4_attention_rmsnorm_squared_sum_keep_dims_0, x = block_4_attention_rmsnorm_scaled)[name = string("block_4_attention_rmsnorm_squared_sum")]; fp16 block_4_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_4_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_4_attention_rmsnorm_rsqrt_epsilon_0, x = block_4_attention_rmsnorm_squared_sum)[name = string("block_4_attention_rmsnorm_rsqrt")]; fp16 block_4_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_4_attention_rmsnorm_dim_scaled = mul(x = block_4_attention_rmsnorm_scaled, y = block_4_attention_rmsnorm_dim_scaled_y_0)[name = string("block_4_attention_rmsnorm_dim_scaled")]; tensor block_4_attention_rmsnorm_normalized = mul(x = block_4_attention_rmsnorm_dim_scaled, y = block_4_attention_rmsnorm_rsqrt)[name = string("block_4_attention_rmsnorm_normalized")]; tensor block_4_attention_rmsnorm_y_0 = const()[name = string("block_4_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337610176)))]; tensor block_4_attention_rmsnorm = mul(x = block_4_attention_rmsnorm_normalized, y = block_4_attention_rmsnorm_y_0)[name = string("block_4_attention_rmsnorm")]; tensor attention_4_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337612032))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338386240))))[name = string("attention_4_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_15 = constexpr_blockwise_shift_scale(data = attention_4_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338423168))))[name = string("constexpr_blockwise_shift_scale_15")]; tensor attention_4_qkvproj_bias_0 = const()[name = string("attention_4_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338425536)))]; tensor attention_4_qkvproj_strides_0 = const()[name = string("attention_4_qkvproj_strides_0"), val = tensor([1])]; string attention_4_qkvproj_pad_type_0 = const()[name = string("attention_4_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_4_qkvproj_pad_0 = const()[name = string("attention_4_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_4_qkvproj_dilations_0 = const()[name = string("attention_4_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_4_qkvproj_groups_0 = const()[name = string("attention_4_qkvproj_groups_0"), val = int32(1)]; tensor attention_4_qkvproj = conv(bias = attention_4_qkvproj_bias_0, dilations = attention_4_qkvproj_dilations_0, groups = attention_4_qkvproj_groups_0, pad = attention_4_qkvproj_pad_0, pad_type = attention_4_qkvproj_pad_type_0, strides = attention_4_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_15, x = block_4_attention_rmsnorm)[name = string("attention_4_qkvproj")]; tensor attention_4_head_reshape_shape_0 = const()[name = string("attention_4_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_4_head_reshape = reshape(shape = attention_4_head_reshape_shape_0, x = attention_4_qkvproj)[name = string("attention_4_head_reshape")]; tensor attention_4_head_transpose_perm_0 = const()[name = string("attention_4_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_4_split_qkv_heads_axis_0 = const()[name = string("attention_4_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_4_split_qkv_heads_split_sizes_0 = const()[name = string("attention_4_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_4_head_transpose = transpose(perm = attention_4_head_transpose_perm_0, x = attention_4_head_reshape)[name = string("transpose_40")]; tensor attention_4_split_qkv_heads_0, tensor attention_4_split_qkv_heads_1, tensor attention_4_split_qkv_heads_2 = split(axis = attention_4_split_qkv_heads_axis_0, split_sizes = attention_4_split_qkv_heads_split_sizes_0, x = attention_4_head_transpose)[name = string("attention_4_split_qkv_heads")]; tensor attention_4_q_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_4_q_rope_lhs_mult")]; int32 attention_4_q_rotate_half_split_num_splits_0 = const()[name = string("attention_4_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_4_q_rotate_half_split_axis_0 = const()[name = string("attention_4_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_4_q_rotate_half_split_0, tensor attention_4_q_rotate_half_split_1 = split(axis = attention_4_q_rotate_half_split_axis_0, num_splits = attention_4_q_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_0)[name = string("attention_4_q_rotate_half_split")]; fp16 attention_4_q_rotate_half_neg_y_0 = const()[name = string("attention_4_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_4_q_rotate_half_neg = mul(x = attention_4_q_rotate_half_split_1, y = attention_4_q_rotate_half_neg_y_0)[name = string("attention_4_q_rotate_half_neg")]; int32 attention_4_q_rotate_half_concat_axis_0 = const()[name = string("attention_4_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_4_q_rotate_half_concat_interleave_0 = const()[name = string("attention_4_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_4_q_rotate_half_concat = concat(axis = attention_4_q_rotate_half_concat_axis_0, interleave = attention_4_q_rotate_half_concat_interleave_0, values = (attention_4_q_rotate_half_neg, attention_4_q_rotate_half_split_0))[name = string("attention_4_q_rotate_half_concat")]; tensor attention_4_q_rope_rhs_mult = mul(x = attention_4_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_q_rope_rhs_mult")]; tensor attention_4_q_rope = add(x = attention_4_q_rope_lhs_mult, y = attention_4_q_rope_rhs_mult)[name = string("attention_4_q_rope")]; tensor attention_4_k_rope_lhs_mult = mul(x = attention_4_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_4_k_rope_lhs_mult")]; int32 attention_4_k_rotate_half_split_num_splits_0 = const()[name = string("attention_4_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_4_k_rotate_half_split_axis_0 = const()[name = string("attention_4_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_4_k_rotate_half_split_0, tensor attention_4_k_rotate_half_split_1 = split(axis = attention_4_k_rotate_half_split_axis_0, num_splits = attention_4_k_rotate_half_split_num_splits_0, x = attention_4_split_qkv_heads_1)[name = string("attention_4_k_rotate_half_split")]; fp16 attention_4_k_rotate_half_neg_y_0 = const()[name = string("attention_4_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_4_k_rotate_half_neg = mul(x = attention_4_k_rotate_half_split_1, y = attention_4_k_rotate_half_neg_y_0)[name = string("attention_4_k_rotate_half_neg")]; int32 attention_4_k_rotate_half_concat_axis_0 = const()[name = string("attention_4_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_4_k_rotate_half_concat_interleave_0 = const()[name = string("attention_4_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_4_k_rotate_half_concat = concat(axis = attention_4_k_rotate_half_concat_axis_0, interleave = attention_4_k_rotate_half_concat_interleave_0, values = (attention_4_k_rotate_half_neg, attention_4_k_rotate_half_split_0))[name = string("attention_4_k_rotate_half_concat")]; tensor attention_4_k_rope_rhs_mult = mul(x = attention_4_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_4_k_rope_rhs_mult")]; tensor attention_4_k_rope = add(x = attention_4_k_rope_lhs_mult, y = attention_4_k_rope_rhs_mult)[name = string("attention_4_k_rope")]; int32 attention_4_q_splits_axis_0 = const()[name = string("attention_4_q_splits_axis_0"), val = int32(1)]; int32 attention_4_q_splits_num_splits_0 = const()[name = string("attention_4_q_splits_num_splits_0"), val = int32(2)]; tensor attention_4_q_splits_0, tensor attention_4_q_splits_1 = split(axis = attention_4_q_splits_axis_0, num_splits = attention_4_q_splits_num_splits_0, x = attention_4_q_rope)[name = string("attention_4_q_splits")]; tensor attention_4_update_begin_0_values0_0 = const()[name = string("attention_4_update_begin_0_values0_0"), val = tensor([4])]; tensor attention_4_update_begin_0_values1_0 = const()[name = string("attention_4_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_4_update_begin_0_values3_0 = const()[name = string("attention_4_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_4_update_begin_0_axis_0 = const()[name = string("attention_4_update_begin_0_axis_0"), val = int32(0)]; bool attention_4_update_begin_0_interleave_0 = const()[name = string("attention_4_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_4_update_begin_0 = concat(axis = attention_4_update_begin_0_axis_0, interleave = attention_4_update_begin_0_interleave_0, values = (attention_4_update_begin_0_values0_0, attention_4_update_begin_0_values1_0, query_pos1, attention_4_update_begin_0_values3_0))[name = string("attention_4_update_begin_0")]; tensor attention_4_update_end_0_values0_0 = const()[name = string("attention_4_update_end_0_values0_0"), val = tensor([5])]; tensor attention_4_update_end_0_values1_0 = const()[name = string("attention_4_update_end_0_values1_0"), val = tensor([2])]; tensor attention_4_update_end_0_values3_0 = const()[name = string("attention_4_update_end_0_values3_0"), val = tensor([64])]; int32 attention_4_update_end_0_axis_0 = const()[name = string("attention_4_update_end_0_axis_0"), val = int32(0)]; bool attention_4_update_end_0_interleave_0 = const()[name = string("attention_4_update_end_0_interleave_0"), val = bool(false)]; tensor attention_4_update_end_0 = concat(axis = attention_4_update_end_0_axis_0, interleave = attention_4_update_end_0_interleave_0, values = (attention_4_update_end_0_values0_0, attention_4_update_end_0_values1_0, end_pos_0, attention_4_update_end_0_values3_0))[name = string("attention_4_update_end_0")]; tensor attention_4_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_updated_key_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_key_cache_0_squeeze_mask_0, update = attention_4_k_rope, x = coreml_update_state_6)[name = string("attention_4_updated_key_cache_0")]; write_state(data = attention_4_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_8 = read_state(input = key_cache_state)[name = string("coreml_update_state_56")]; tensor attention_4_key_cache_begin_0 = const()[name = string("attention_4_key_cache_begin_0"), val = tensor([4, 0, 0, 0])]; tensor attention_4_key_cache_end_0 = const()[name = string("attention_4_key_cache_end_0"), val = tensor([5, 2, 512, 64])]; tensor attention_4_key_cache_squeeze_mask_0 = const()[name = string("attention_4_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_key_cache = slice_by_index(begin = attention_4_key_cache_begin_0, end = attention_4_key_cache_end_0, squeeze_mask = attention_4_key_cache_squeeze_mask_0, x = coreml_update_state_8)[name = string("attention_4_key_cache")]; int32 attention_4_key_cache_head_axis_0 = const()[name = string("attention_4_key_cache_head_axis_0"), val = int32(1)]; int32 attention_4_key_cache_head_num_splits_0 = const()[name = string("attention_4_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_4_key_cache_head_0, tensor attention_4_key_cache_head_1 = split(axis = attention_4_key_cache_head_axis_0, num_splits = attention_4_key_cache_head_num_splits_0, x = attention_4_key_cache)[name = string("attention_4_key_cache_head")]; tensor attention_4_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_4_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_updated_value_cache_0 = slice_update(begin = attention_4_update_begin_0, end = attention_4_update_end_0, squeeze_mask = attention_4_updated_value_cache_0_squeeze_mask_0, update = attention_4_split_qkv_heads_2, x = coreml_update_state_7)[name = string("attention_4_updated_value_cache_0")]; write_state(data = attention_4_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_9 = read_state(input = value_cache_state)[name = string("coreml_update_state_57")]; tensor attention_4_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_4_slice_current_layer_value_cache_begin_0"), val = tensor([4, 0, 0, 0])]; tensor attention_4_slice_current_layer_value_cache_end_0 = const()[name = string("attention_4_slice_current_layer_value_cache_end_0"), val = tensor([5, 2, 512, 64])]; tensor attention_4_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_4_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_4_slice_current_layer_value_cache = slice_by_index(begin = attention_4_slice_current_layer_value_cache_begin_0, end = attention_4_slice_current_layer_value_cache_end_0, squeeze_mask = attention_4_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_9)[name = string("attention_4_slice_current_layer_value_cache")]; int32 attention_4_slice_value_cache_heads_axis_0 = const()[name = string("attention_4_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_4_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_4_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_4_slice_value_cache_heads_0, tensor attention_4_slice_value_cache_heads_1 = split(axis = attention_4_slice_value_cache_heads_axis_0, num_splits = attention_4_slice_value_cache_heads_num_splits_0, x = attention_4_slice_current_layer_value_cache)[name = string("attention_4_slice_value_cache_heads")]; bool attention_4_scores_0_transpose_y_0 = const()[name = string("attention_4_scores_0_transpose_y_0"), val = bool(true)]; bool attention_4_scores_0_transpose_x_0 = const()[name = string("attention_4_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_4_scores_0 = matmul(transpose_x = attention_4_scores_0_transpose_x_0, transpose_y = attention_4_scores_0_transpose_y_0, x = attention_4_key_cache_head_0, y = attention_4_q_splits_0)[name = string("attention_4_scores_0")]; fp16 attention_4_scaled_scores_0_y_0 = const()[name = string("attention_4_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_4_scaled_scores_0 = mul(x = attention_4_scores_0, y = attention_4_scaled_scores_0_y_0)[name = string("attention_4_scaled_scores_0")]; tensor attention_4_masked_scaled_scores_0 = add(x = attention_4_scaled_scores_0, y = transpose_0)[name = string("attention_4_masked_scaled_scores_0")]; int32 softmax_8_axis_0 = const()[name = string("softmax_8_axis_0"), val = int32(-2)]; tensor softmax_8 = softmax(axis = softmax_8_axis_0, x = attention_4_masked_scaled_scores_0)[name = string("softmax_8")]; bool attention_4_attention_0_transpose_x_0 = const()[name = string("attention_4_attention_0_transpose_x_0"), val = bool(true)]; bool attention_4_attention_0_transpose_y_0 = const()[name = string("attention_4_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_4_attention_0 = matmul(transpose_x = attention_4_attention_0_transpose_x_0, transpose_y = attention_4_attention_0_transpose_y_0, x = softmax_8, y = attention_4_slice_value_cache_heads_0)[name = string("attention_4_attention_0")]; bool attention_4_scores_1_transpose_y_0 = const()[name = string("attention_4_scores_1_transpose_y_0"), val = bool(true)]; bool attention_4_scores_1_transpose_x_0 = const()[name = string("attention_4_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_4_scores_1 = matmul(transpose_x = attention_4_scores_1_transpose_x_0, transpose_y = attention_4_scores_1_transpose_y_0, x = attention_4_key_cache_head_1, y = attention_4_q_splits_1)[name = string("attention_4_scores_1")]; fp16 attention_4_scaled_scores_1_y_0 = const()[name = string("attention_4_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_4_scaled_scores_1 = mul(x = attention_4_scores_1, y = attention_4_scaled_scores_1_y_0)[name = string("attention_4_scaled_scores_1")]; tensor attention_4_masked_scaled_scores_1 = add(x = attention_4_scaled_scores_1, y = transpose_0)[name = string("attention_4_masked_scaled_scores_1")]; int32 softmax_9_axis_0 = const()[name = string("softmax_9_axis_0"), val = int32(-2)]; tensor softmax_9 = softmax(axis = softmax_9_axis_0, x = attention_4_masked_scaled_scores_1)[name = string("softmax_9")]; bool attention_4_attention_1_transpose_x_0 = const()[name = string("attention_4_attention_1_transpose_x_0"), val = bool(true)]; bool attention_4_attention_1_transpose_y_0 = const()[name = string("attention_4_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_4_attention_1 = matmul(transpose_x = attention_4_attention_1_transpose_x_0, transpose_y = attention_4_attention_1_transpose_y_0, x = softmax_9, y = attention_4_slice_value_cache_heads_1)[name = string("attention_4_attention_1")]; int32 attention_4_concat_attention_all_heads_axis_0 = const()[name = string("attention_4_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_4_concat_attention_all_heads_interleave_0 = const()[name = string("attention_4_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_4_concat_attention_all_heads = concat(axis = attention_4_concat_attention_all_heads_axis_0, interleave = attention_4_concat_attention_all_heads_interleave_0, values = (attention_4_attention_0, attention_4_attention_1))[name = string("attention_4_concat_attention_all_heads")]; tensor attention_4_channels_first_retransposed_perm_0 = const()[name = string("attention_4_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_4_reshaped_shape_0 = const()[name = string("attention_4_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_4_channels_first_retransposed = transpose(perm = attention_4_channels_first_retransposed_perm_0, x = attention_4_concat_attention_all_heads)[name = string("transpose_39")]; tensor attention_4_reshaped = reshape(shape = attention_4_reshaped_shape_0, x = attention_4_channels_first_retransposed)[name = string("attention_4_reshaped")]; tensor attention_4_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338427904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339030080))))[name = string("attention_4_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_16 = constexpr_blockwise_shift_scale(data = attention_4_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339058816))))[name = string("constexpr_blockwise_shift_scale_16")]; tensor attention_4_outproj_strides_0 = const()[name = string("attention_4_outproj_strides_0"), val = tensor([1])]; string attention_4_outproj_pad_type_0 = const()[name = string("attention_4_outproj_pad_type_0"), val = string("valid")]; tensor attention_4_outproj_pad_0 = const()[name = string("attention_4_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_4_outproj_dilations_0 = const()[name = string("attention_4_outproj_dilations_0"), val = tensor([1])]; int32 attention_4_outproj_groups_0 = const()[name = string("attention_4_outproj_groups_0"), val = int32(1)]; tensor attention_4_outproj = conv(dilations = attention_4_outproj_dilations_0, groups = attention_4_outproj_groups_0, pad = attention_4_outproj_pad_0, pad_type = attention_4_outproj_pad_type_0, strides = attention_4_outproj_strides_0, weight = constexpr_blockwise_shift_scale_16, x = attention_4_reshaped)[name = string("attention_4_outproj")]; tensor block_4_residual_1 = add(x = block_3_residual_2, y = attention_4_outproj)[name = string("block_4_residual_1")]; tensor block_4_ffn_rmsnorm_abs = abs(x = block_4_residual_1)[name = string("block_4_ffn_rmsnorm_abs")]; tensor block_4_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_4_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_4_ffn_rmsnorm_maxval = reduce_max(axes = block_4_ffn_rmsnorm_maxval_axes_0, keep_dims = block_4_ffn_rmsnorm_maxval_keep_dims_0, x = block_4_ffn_rmsnorm_abs)[name = string("block_4_ffn_rmsnorm_maxval")]; fp16 block_4_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_4_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_4_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_4_ffn_rmsnorm_maxval_clipped = clip(alpha = block_4_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_4_ffn_rmsnorm_maxval_clipped_beta_0, x = block_4_ffn_rmsnorm_maxval)[name = string("block_4_ffn_rmsnorm_maxval_clipped")]; tensor block_4_ffn_rmsnorm_scaled = real_div(x = block_4_residual_1, y = block_4_ffn_rmsnorm_maxval_clipped)[name = string("block_4_ffn_rmsnorm_scaled")]; tensor block_4_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_4_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_4_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_4_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_4_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_4_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_4_ffn_rmsnorm_scaled)[name = string("block_4_ffn_rmsnorm_squared_sum")]; fp16 block_4_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_4_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_4_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_4_ffn_rmsnorm_rsqrt_epsilon_0, x = block_4_ffn_rmsnorm_squared_sum)[name = string("block_4_ffn_rmsnorm_rsqrt")]; fp16 block_4_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_4_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_4_ffn_rmsnorm_dim_scaled = mul(x = block_4_ffn_rmsnorm_scaled, y = block_4_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_4_ffn_rmsnorm_dim_scaled")]; tensor block_4_ffn_rmsnorm_normalized = mul(x = block_4_ffn_rmsnorm_dim_scaled, y = block_4_ffn_rmsnorm_rsqrt)[name = string("block_4_ffn_rmsnorm_normalized")]; tensor block_4_ffn_rmsnorm_y_0 = const()[name = string("block_4_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339060672)))]; tensor block_4_ffn_rmsnorm = mul(x = block_4_ffn_rmsnorm_normalized, y = block_4_ffn_rmsnorm_y_0)[name = string("block_4_ffn_rmsnorm")]; tensor block_4_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(339062528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342331200))))[name = string("block_4_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_17 = constexpr_blockwise_shift_scale(data = block_4_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342486912))))[name = string("constexpr_blockwise_shift_scale_17")]; tensor block_4_ffn_inproj_strides_0 = const()[name = string("block_4_ffn_inproj_strides_0"), val = tensor([1])]; string block_4_ffn_inproj_pad_type_0 = const()[name = string("block_4_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_4_ffn_inproj_pad_0 = const()[name = string("block_4_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_4_ffn_inproj_dilations_0 = const()[name = string("block_4_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_4_ffn_inproj_groups_0 = const()[name = string("block_4_ffn_inproj_groups_0"), val = int32(1)]; tensor block_4_ffn_inproj = conv(dilations = block_4_ffn_inproj_dilations_0, groups = block_4_ffn_inproj_groups_0, pad = block_4_ffn_inproj_pad_0, pad_type = block_4_ffn_inproj_pad_type_0, strides = block_4_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_17, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_inproj")]; tensor block_4_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342496704))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345765376))))[name = string("block_4_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_18 = constexpr_blockwise_shift_scale(data = block_4_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345921088))))[name = string("constexpr_blockwise_shift_scale_18")]; tensor block_4_ffn_g_strides_0 = const()[name = string("block_4_ffn_g_strides_0"), val = tensor([1])]; string block_4_ffn_g_pad_type_0 = const()[name = string("block_4_ffn_g_pad_type_0"), val = string("valid")]; tensor block_4_ffn_g_pad_0 = const()[name = string("block_4_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_4_ffn_g_dilations_0 = const()[name = string("block_4_ffn_g_dilations_0"), val = tensor([1])]; int32 block_4_ffn_g_groups_0 = const()[name = string("block_4_ffn_g_groups_0"), val = int32(1)]; tensor block_4_ffn_g = conv(dilations = block_4_ffn_g_dilations_0, groups = block_4_ffn_g_groups_0, pad = block_4_ffn_g_pad_0, pad_type = block_4_ffn_g_pad_type_0, strides = block_4_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_18, x = block_4_ffn_rmsnorm)[name = string("block_4_ffn_g")]; tensor block_4_ffn_g_activation = silu(x = block_4_ffn_g)[name = string("block_4_ffn_g_activation")]; tensor block_4_ffn_x_gated = mul(x = block_4_ffn_inproj, y = block_4_ffn_g_activation)[name = string("block_4_ffn_x_gated")]; tensor block_4_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(345930880))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349199552))))[name = string("block_4_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_19 = constexpr_blockwise_shift_scale(data = block_4_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349228288))))[name = string("constexpr_blockwise_shift_scale_19")]; tensor block_4_ffn_outproj_strides_0 = const()[name = string("block_4_ffn_outproj_strides_0"), val = tensor([1])]; string block_4_ffn_outproj_pad_type_0 = const()[name = string("block_4_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_4_ffn_outproj_pad_0 = const()[name = string("block_4_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_4_ffn_outproj_dilations_0 = const()[name = string("block_4_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_4_ffn_outproj_groups_0 = const()[name = string("block_4_ffn_outproj_groups_0"), val = int32(1)]; tensor block_4_ffn_outproj = conv(dilations = block_4_ffn_outproj_dilations_0, groups = block_4_ffn_outproj_groups_0, pad = block_4_ffn_outproj_pad_0, pad_type = block_4_ffn_outproj_pad_type_0, strides = block_4_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_19, x = block_4_ffn_x_gated)[name = string("block_4_ffn_outproj")]; tensor block_4_residual_2 = add(x = block_4_ffn_outproj, y = block_4_residual_1)[name = string("block_4_residual_2")]; tensor block_5_attention_rmsnorm_abs = abs(x = block_4_residual_2)[name = string("block_5_attention_rmsnorm_abs")]; tensor block_5_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_5_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_5_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_5_attention_rmsnorm_maxval = reduce_max(axes = block_5_attention_rmsnorm_maxval_axes_0, keep_dims = block_5_attention_rmsnorm_maxval_keep_dims_0, x = block_5_attention_rmsnorm_abs)[name = string("block_5_attention_rmsnorm_maxval")]; fp16 block_5_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_5_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_5_attention_rmsnorm_maxval_clipped = clip(alpha = block_5_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_5_attention_rmsnorm_maxval_clipped_beta_0, x = block_5_attention_rmsnorm_maxval)[name = string("block_5_attention_rmsnorm_maxval_clipped")]; tensor block_5_attention_rmsnorm_scaled = real_div(x = block_4_residual_2, y = block_5_attention_rmsnorm_maxval_clipped)[name = string("block_5_attention_rmsnorm_scaled")]; tensor block_5_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_5_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_5_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_5_attention_rmsnorm_squared_sum_keep_dims_0, x = block_5_attention_rmsnorm_scaled)[name = string("block_5_attention_rmsnorm_squared_sum")]; fp16 block_5_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_5_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_5_attention_rmsnorm_rsqrt_epsilon_0, x = block_5_attention_rmsnorm_squared_sum)[name = string("block_5_attention_rmsnorm_rsqrt")]; fp16 block_5_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_5_attention_rmsnorm_dim_scaled = mul(x = block_5_attention_rmsnorm_scaled, y = block_5_attention_rmsnorm_dim_scaled_y_0)[name = string("block_5_attention_rmsnorm_dim_scaled")]; tensor block_5_attention_rmsnorm_normalized = mul(x = block_5_attention_rmsnorm_dim_scaled, y = block_5_attention_rmsnorm_rsqrt)[name = string("block_5_attention_rmsnorm_normalized")]; tensor block_5_attention_rmsnorm_y_0 = const()[name = string("block_5_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349230144)))]; tensor block_5_attention_rmsnorm = mul(x = block_5_attention_rmsnorm_normalized, y = block_5_attention_rmsnorm_y_0)[name = string("block_5_attention_rmsnorm")]; tensor attention_5_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349232000))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350006208))))[name = string("attention_5_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_20 = constexpr_blockwise_shift_scale(data = attention_5_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350043136))))[name = string("constexpr_blockwise_shift_scale_20")]; tensor attention_5_qkvproj_bias_0 = const()[name = string("attention_5_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350045504)))]; tensor attention_5_qkvproj_strides_0 = const()[name = string("attention_5_qkvproj_strides_0"), val = tensor([1])]; string attention_5_qkvproj_pad_type_0 = const()[name = string("attention_5_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_5_qkvproj_pad_0 = const()[name = string("attention_5_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_5_qkvproj_dilations_0 = const()[name = string("attention_5_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_5_qkvproj_groups_0 = const()[name = string("attention_5_qkvproj_groups_0"), val = int32(1)]; tensor attention_5_qkvproj = conv(bias = attention_5_qkvproj_bias_0, dilations = attention_5_qkvproj_dilations_0, groups = attention_5_qkvproj_groups_0, pad = attention_5_qkvproj_pad_0, pad_type = attention_5_qkvproj_pad_type_0, strides = attention_5_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_20, x = block_5_attention_rmsnorm)[name = string("attention_5_qkvproj")]; tensor attention_5_head_reshape_shape_0 = const()[name = string("attention_5_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_5_head_reshape = reshape(shape = attention_5_head_reshape_shape_0, x = attention_5_qkvproj)[name = string("attention_5_head_reshape")]; tensor attention_5_head_transpose_perm_0 = const()[name = string("attention_5_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_5_split_qkv_heads_axis_0 = const()[name = string("attention_5_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_5_split_qkv_heads_split_sizes_0 = const()[name = string("attention_5_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_5_head_transpose = transpose(perm = attention_5_head_transpose_perm_0, x = attention_5_head_reshape)[name = string("transpose_38")]; tensor attention_5_split_qkv_heads_0, tensor attention_5_split_qkv_heads_1, tensor attention_5_split_qkv_heads_2 = split(axis = attention_5_split_qkv_heads_axis_0, split_sizes = attention_5_split_qkv_heads_split_sizes_0, x = attention_5_head_transpose)[name = string("attention_5_split_qkv_heads")]; tensor attention_5_q_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_5_q_rope_lhs_mult")]; int32 attention_5_q_rotate_half_split_num_splits_0 = const()[name = string("attention_5_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_5_q_rotate_half_split_axis_0 = const()[name = string("attention_5_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_5_q_rotate_half_split_0, tensor attention_5_q_rotate_half_split_1 = split(axis = attention_5_q_rotate_half_split_axis_0, num_splits = attention_5_q_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_0)[name = string("attention_5_q_rotate_half_split")]; fp16 attention_5_q_rotate_half_neg_y_0 = const()[name = string("attention_5_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_5_q_rotate_half_neg = mul(x = attention_5_q_rotate_half_split_1, y = attention_5_q_rotate_half_neg_y_0)[name = string("attention_5_q_rotate_half_neg")]; int32 attention_5_q_rotate_half_concat_axis_0 = const()[name = string("attention_5_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_5_q_rotate_half_concat_interleave_0 = const()[name = string("attention_5_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_5_q_rotate_half_concat = concat(axis = attention_5_q_rotate_half_concat_axis_0, interleave = attention_5_q_rotate_half_concat_interleave_0, values = (attention_5_q_rotate_half_neg, attention_5_q_rotate_half_split_0))[name = string("attention_5_q_rotate_half_concat")]; tensor attention_5_q_rope_rhs_mult = mul(x = attention_5_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_q_rope_rhs_mult")]; tensor attention_5_q_rope = add(x = attention_5_q_rope_lhs_mult, y = attention_5_q_rope_rhs_mult)[name = string("attention_5_q_rope")]; tensor attention_5_k_rope_lhs_mult = mul(x = attention_5_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_5_k_rope_lhs_mult")]; int32 attention_5_k_rotate_half_split_num_splits_0 = const()[name = string("attention_5_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_5_k_rotate_half_split_axis_0 = const()[name = string("attention_5_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_5_k_rotate_half_split_0, tensor attention_5_k_rotate_half_split_1 = split(axis = attention_5_k_rotate_half_split_axis_0, num_splits = attention_5_k_rotate_half_split_num_splits_0, x = attention_5_split_qkv_heads_1)[name = string("attention_5_k_rotate_half_split")]; fp16 attention_5_k_rotate_half_neg_y_0 = const()[name = string("attention_5_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_5_k_rotate_half_neg = mul(x = attention_5_k_rotate_half_split_1, y = attention_5_k_rotate_half_neg_y_0)[name = string("attention_5_k_rotate_half_neg")]; int32 attention_5_k_rotate_half_concat_axis_0 = const()[name = string("attention_5_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_5_k_rotate_half_concat_interleave_0 = const()[name = string("attention_5_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_5_k_rotate_half_concat = concat(axis = attention_5_k_rotate_half_concat_axis_0, interleave = attention_5_k_rotate_half_concat_interleave_0, values = (attention_5_k_rotate_half_neg, attention_5_k_rotate_half_split_0))[name = string("attention_5_k_rotate_half_concat")]; tensor attention_5_k_rope_rhs_mult = mul(x = attention_5_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_5_k_rope_rhs_mult")]; tensor attention_5_k_rope = add(x = attention_5_k_rope_lhs_mult, y = attention_5_k_rope_rhs_mult)[name = string("attention_5_k_rope")]; int32 attention_5_q_splits_axis_0 = const()[name = string("attention_5_q_splits_axis_0"), val = int32(1)]; int32 attention_5_q_splits_num_splits_0 = const()[name = string("attention_5_q_splits_num_splits_0"), val = int32(2)]; tensor attention_5_q_splits_0, tensor attention_5_q_splits_1 = split(axis = attention_5_q_splits_axis_0, num_splits = attention_5_q_splits_num_splits_0, x = attention_5_q_rope)[name = string("attention_5_q_splits")]; tensor attention_5_update_begin_0_values0_0 = const()[name = string("attention_5_update_begin_0_values0_0"), val = tensor([5])]; tensor attention_5_update_begin_0_values1_0 = const()[name = string("attention_5_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_5_update_begin_0_values3_0 = const()[name = string("attention_5_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_5_update_begin_0_axis_0 = const()[name = string("attention_5_update_begin_0_axis_0"), val = int32(0)]; bool attention_5_update_begin_0_interleave_0 = const()[name = string("attention_5_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_5_update_begin_0 = concat(axis = attention_5_update_begin_0_axis_0, interleave = attention_5_update_begin_0_interleave_0, values = (attention_5_update_begin_0_values0_0, attention_5_update_begin_0_values1_0, query_pos1, attention_5_update_begin_0_values3_0))[name = string("attention_5_update_begin_0")]; tensor attention_5_update_end_0_values0_0 = const()[name = string("attention_5_update_end_0_values0_0"), val = tensor([6])]; tensor attention_5_update_end_0_values1_0 = const()[name = string("attention_5_update_end_0_values1_0"), val = tensor([2])]; tensor attention_5_update_end_0_values3_0 = const()[name = string("attention_5_update_end_0_values3_0"), val = tensor([64])]; int32 attention_5_update_end_0_axis_0 = const()[name = string("attention_5_update_end_0_axis_0"), val = int32(0)]; bool attention_5_update_end_0_interleave_0 = const()[name = string("attention_5_update_end_0_interleave_0"), val = bool(false)]; tensor attention_5_update_end_0 = concat(axis = attention_5_update_end_0_axis_0, interleave = attention_5_update_end_0_interleave_0, values = (attention_5_update_end_0_values0_0, attention_5_update_end_0_values1_0, end_pos_0, attention_5_update_end_0_values3_0))[name = string("attention_5_update_end_0")]; tensor attention_5_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_updated_key_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_key_cache_0_squeeze_mask_0, update = attention_5_k_rope, x = coreml_update_state_8)[name = string("attention_5_updated_key_cache_0")]; write_state(data = attention_5_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_10 = read_state(input = key_cache_state)[name = string("coreml_update_state_58")]; tensor attention_5_key_cache_begin_0 = const()[name = string("attention_5_key_cache_begin_0"), val = tensor([5, 0, 0, 0])]; tensor attention_5_key_cache_end_0 = const()[name = string("attention_5_key_cache_end_0"), val = tensor([6, 2, 512, 64])]; tensor attention_5_key_cache_squeeze_mask_0 = const()[name = string("attention_5_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_key_cache = slice_by_index(begin = attention_5_key_cache_begin_0, end = attention_5_key_cache_end_0, squeeze_mask = attention_5_key_cache_squeeze_mask_0, x = coreml_update_state_10)[name = string("attention_5_key_cache")]; int32 attention_5_key_cache_head_axis_0 = const()[name = string("attention_5_key_cache_head_axis_0"), val = int32(1)]; int32 attention_5_key_cache_head_num_splits_0 = const()[name = string("attention_5_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_5_key_cache_head_0, tensor attention_5_key_cache_head_1 = split(axis = attention_5_key_cache_head_axis_0, num_splits = attention_5_key_cache_head_num_splits_0, x = attention_5_key_cache)[name = string("attention_5_key_cache_head")]; tensor attention_5_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_5_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_updated_value_cache_0 = slice_update(begin = attention_5_update_begin_0, end = attention_5_update_end_0, squeeze_mask = attention_5_updated_value_cache_0_squeeze_mask_0, update = attention_5_split_qkv_heads_2, x = coreml_update_state_9)[name = string("attention_5_updated_value_cache_0")]; write_state(data = attention_5_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_11 = read_state(input = value_cache_state)[name = string("coreml_update_state_59")]; tensor attention_5_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_5_slice_current_layer_value_cache_begin_0"), val = tensor([5, 0, 0, 0])]; tensor attention_5_slice_current_layer_value_cache_end_0 = const()[name = string("attention_5_slice_current_layer_value_cache_end_0"), val = tensor([6, 2, 512, 64])]; tensor attention_5_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_5_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_5_slice_current_layer_value_cache = slice_by_index(begin = attention_5_slice_current_layer_value_cache_begin_0, end = attention_5_slice_current_layer_value_cache_end_0, squeeze_mask = attention_5_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_11)[name = string("attention_5_slice_current_layer_value_cache")]; int32 attention_5_slice_value_cache_heads_axis_0 = const()[name = string("attention_5_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_5_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_5_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_5_slice_value_cache_heads_0, tensor attention_5_slice_value_cache_heads_1 = split(axis = attention_5_slice_value_cache_heads_axis_0, num_splits = attention_5_slice_value_cache_heads_num_splits_0, x = attention_5_slice_current_layer_value_cache)[name = string("attention_5_slice_value_cache_heads")]; bool attention_5_scores_0_transpose_y_0 = const()[name = string("attention_5_scores_0_transpose_y_0"), val = bool(true)]; bool attention_5_scores_0_transpose_x_0 = const()[name = string("attention_5_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_5_scores_0 = matmul(transpose_x = attention_5_scores_0_transpose_x_0, transpose_y = attention_5_scores_0_transpose_y_0, x = attention_5_key_cache_head_0, y = attention_5_q_splits_0)[name = string("attention_5_scores_0")]; fp16 attention_5_scaled_scores_0_y_0 = const()[name = string("attention_5_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_5_scaled_scores_0 = mul(x = attention_5_scores_0, y = attention_5_scaled_scores_0_y_0)[name = string("attention_5_scaled_scores_0")]; tensor attention_5_masked_scaled_scores_0 = add(x = attention_5_scaled_scores_0, y = transpose_0)[name = string("attention_5_masked_scaled_scores_0")]; int32 softmax_10_axis_0 = const()[name = string("softmax_10_axis_0"), val = int32(-2)]; tensor softmax_10 = softmax(axis = softmax_10_axis_0, x = attention_5_masked_scaled_scores_0)[name = string("softmax_10")]; bool attention_5_attention_0_transpose_x_0 = const()[name = string("attention_5_attention_0_transpose_x_0"), val = bool(true)]; bool attention_5_attention_0_transpose_y_0 = const()[name = string("attention_5_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_5_attention_0 = matmul(transpose_x = attention_5_attention_0_transpose_x_0, transpose_y = attention_5_attention_0_transpose_y_0, x = softmax_10, y = attention_5_slice_value_cache_heads_0)[name = string("attention_5_attention_0")]; bool attention_5_scores_1_transpose_y_0 = const()[name = string("attention_5_scores_1_transpose_y_0"), val = bool(true)]; bool attention_5_scores_1_transpose_x_0 = const()[name = string("attention_5_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_5_scores_1 = matmul(transpose_x = attention_5_scores_1_transpose_x_0, transpose_y = attention_5_scores_1_transpose_y_0, x = attention_5_key_cache_head_1, y = attention_5_q_splits_1)[name = string("attention_5_scores_1")]; fp16 attention_5_scaled_scores_1_y_0 = const()[name = string("attention_5_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_5_scaled_scores_1 = mul(x = attention_5_scores_1, y = attention_5_scaled_scores_1_y_0)[name = string("attention_5_scaled_scores_1")]; tensor attention_5_masked_scaled_scores_1 = add(x = attention_5_scaled_scores_1, y = transpose_0)[name = string("attention_5_masked_scaled_scores_1")]; int32 softmax_11_axis_0 = const()[name = string("softmax_11_axis_0"), val = int32(-2)]; tensor softmax_11 = softmax(axis = softmax_11_axis_0, x = attention_5_masked_scaled_scores_1)[name = string("softmax_11")]; bool attention_5_attention_1_transpose_x_0 = const()[name = string("attention_5_attention_1_transpose_x_0"), val = bool(true)]; bool attention_5_attention_1_transpose_y_0 = const()[name = string("attention_5_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_5_attention_1 = matmul(transpose_x = attention_5_attention_1_transpose_x_0, transpose_y = attention_5_attention_1_transpose_y_0, x = softmax_11, y = attention_5_slice_value_cache_heads_1)[name = string("attention_5_attention_1")]; int32 attention_5_concat_attention_all_heads_axis_0 = const()[name = string("attention_5_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_5_concat_attention_all_heads_interleave_0 = const()[name = string("attention_5_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_5_concat_attention_all_heads = concat(axis = attention_5_concat_attention_all_heads_axis_0, interleave = attention_5_concat_attention_all_heads_interleave_0, values = (attention_5_attention_0, attention_5_attention_1))[name = string("attention_5_concat_attention_all_heads")]; tensor attention_5_channels_first_retransposed_perm_0 = const()[name = string("attention_5_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_5_reshaped_shape_0 = const()[name = string("attention_5_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_5_channels_first_retransposed = transpose(perm = attention_5_channels_first_retransposed_perm_0, x = attention_5_concat_attention_all_heads)[name = string("transpose_37")]; tensor attention_5_reshaped = reshape(shape = attention_5_reshaped_shape_0, x = attention_5_channels_first_retransposed)[name = string("attention_5_reshaped")]; tensor attention_5_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350047872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350650048))))[name = string("attention_5_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_21 = constexpr_blockwise_shift_scale(data = attention_5_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350678784))))[name = string("constexpr_blockwise_shift_scale_21")]; tensor attention_5_outproj_strides_0 = const()[name = string("attention_5_outproj_strides_0"), val = tensor([1])]; string attention_5_outproj_pad_type_0 = const()[name = string("attention_5_outproj_pad_type_0"), val = string("valid")]; tensor attention_5_outproj_pad_0 = const()[name = string("attention_5_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_5_outproj_dilations_0 = const()[name = string("attention_5_outproj_dilations_0"), val = tensor([1])]; int32 attention_5_outproj_groups_0 = const()[name = string("attention_5_outproj_groups_0"), val = int32(1)]; tensor attention_5_outproj = conv(dilations = attention_5_outproj_dilations_0, groups = attention_5_outproj_groups_0, pad = attention_5_outproj_pad_0, pad_type = attention_5_outproj_pad_type_0, strides = attention_5_outproj_strides_0, weight = constexpr_blockwise_shift_scale_21, x = attention_5_reshaped)[name = string("attention_5_outproj")]; tensor block_5_residual_1 = add(x = block_4_residual_2, y = attention_5_outproj)[name = string("block_5_residual_1")]; tensor block_5_ffn_rmsnorm_abs = abs(x = block_5_residual_1)[name = string("block_5_ffn_rmsnorm_abs")]; tensor block_5_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_5_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_5_ffn_rmsnorm_maxval = reduce_max(axes = block_5_ffn_rmsnorm_maxval_axes_0, keep_dims = block_5_ffn_rmsnorm_maxval_keep_dims_0, x = block_5_ffn_rmsnorm_abs)[name = string("block_5_ffn_rmsnorm_maxval")]; fp16 block_5_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_5_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_5_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_5_ffn_rmsnorm_maxval_clipped = clip(alpha = block_5_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_5_ffn_rmsnorm_maxval_clipped_beta_0, x = block_5_ffn_rmsnorm_maxval)[name = string("block_5_ffn_rmsnorm_maxval_clipped")]; tensor block_5_ffn_rmsnorm_scaled = real_div(x = block_5_residual_1, y = block_5_ffn_rmsnorm_maxval_clipped)[name = string("block_5_ffn_rmsnorm_scaled")]; tensor block_5_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_5_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_5_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_5_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_5_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_5_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_5_ffn_rmsnorm_scaled)[name = string("block_5_ffn_rmsnorm_squared_sum")]; fp16 block_5_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_5_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_5_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_5_ffn_rmsnorm_rsqrt_epsilon_0, x = block_5_ffn_rmsnorm_squared_sum)[name = string("block_5_ffn_rmsnorm_rsqrt")]; fp16 block_5_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_5_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_5_ffn_rmsnorm_dim_scaled = mul(x = block_5_ffn_rmsnorm_scaled, y = block_5_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_5_ffn_rmsnorm_dim_scaled")]; tensor block_5_ffn_rmsnorm_normalized = mul(x = block_5_ffn_rmsnorm_dim_scaled, y = block_5_ffn_rmsnorm_rsqrt)[name = string("block_5_ffn_rmsnorm_normalized")]; tensor block_5_ffn_rmsnorm_y_0 = const()[name = string("block_5_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350680640)))]; tensor block_5_ffn_rmsnorm = mul(x = block_5_ffn_rmsnorm_normalized, y = block_5_ffn_rmsnorm_y_0)[name = string("block_5_ffn_rmsnorm")]; tensor block_5_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350682496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353951168))))[name = string("block_5_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_22 = constexpr_blockwise_shift_scale(data = block_5_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354106880))))[name = string("constexpr_blockwise_shift_scale_22")]; tensor block_5_ffn_inproj_strides_0 = const()[name = string("block_5_ffn_inproj_strides_0"), val = tensor([1])]; string block_5_ffn_inproj_pad_type_0 = const()[name = string("block_5_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_5_ffn_inproj_pad_0 = const()[name = string("block_5_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_5_ffn_inproj_dilations_0 = const()[name = string("block_5_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_5_ffn_inproj_groups_0 = const()[name = string("block_5_ffn_inproj_groups_0"), val = int32(1)]; tensor block_5_ffn_inproj = conv(dilations = block_5_ffn_inproj_dilations_0, groups = block_5_ffn_inproj_groups_0, pad = block_5_ffn_inproj_pad_0, pad_type = block_5_ffn_inproj_pad_type_0, strides = block_5_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_22, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_inproj")]; tensor block_5_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354116672))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357385344))))[name = string("block_5_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_23 = constexpr_blockwise_shift_scale(data = block_5_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357541056))))[name = string("constexpr_blockwise_shift_scale_23")]; tensor block_5_ffn_g_strides_0 = const()[name = string("block_5_ffn_g_strides_0"), val = tensor([1])]; string block_5_ffn_g_pad_type_0 = const()[name = string("block_5_ffn_g_pad_type_0"), val = string("valid")]; tensor block_5_ffn_g_pad_0 = const()[name = string("block_5_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_5_ffn_g_dilations_0 = const()[name = string("block_5_ffn_g_dilations_0"), val = tensor([1])]; int32 block_5_ffn_g_groups_0 = const()[name = string("block_5_ffn_g_groups_0"), val = int32(1)]; tensor block_5_ffn_g = conv(dilations = block_5_ffn_g_dilations_0, groups = block_5_ffn_g_groups_0, pad = block_5_ffn_g_pad_0, pad_type = block_5_ffn_g_pad_type_0, strides = block_5_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_23, x = block_5_ffn_rmsnorm)[name = string("block_5_ffn_g")]; tensor block_5_ffn_g_activation = silu(x = block_5_ffn_g)[name = string("block_5_ffn_g_activation")]; tensor block_5_ffn_x_gated = mul(x = block_5_ffn_inproj, y = block_5_ffn_g_activation)[name = string("block_5_ffn_x_gated")]; tensor block_5_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(357550848))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360819520))))[name = string("block_5_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_24 = constexpr_blockwise_shift_scale(data = block_5_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360848256))))[name = string("constexpr_blockwise_shift_scale_24")]; tensor block_5_ffn_outproj_strides_0 = const()[name = string("block_5_ffn_outproj_strides_0"), val = tensor([1])]; string block_5_ffn_outproj_pad_type_0 = const()[name = string("block_5_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_5_ffn_outproj_pad_0 = const()[name = string("block_5_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_5_ffn_outproj_dilations_0 = const()[name = string("block_5_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_5_ffn_outproj_groups_0 = const()[name = string("block_5_ffn_outproj_groups_0"), val = int32(1)]; tensor block_5_ffn_outproj = conv(dilations = block_5_ffn_outproj_dilations_0, groups = block_5_ffn_outproj_groups_0, pad = block_5_ffn_outproj_pad_0, pad_type = block_5_ffn_outproj_pad_type_0, strides = block_5_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_24, x = block_5_ffn_x_gated)[name = string("block_5_ffn_outproj")]; tensor block_5_residual_2 = add(x = block_5_ffn_outproj, y = block_5_residual_1)[name = string("block_5_residual_2")]; tensor block_6_attention_rmsnorm_abs = abs(x = block_5_residual_2)[name = string("block_6_attention_rmsnorm_abs")]; tensor block_6_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_6_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_6_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_6_attention_rmsnorm_maxval = reduce_max(axes = block_6_attention_rmsnorm_maxval_axes_0, keep_dims = block_6_attention_rmsnorm_maxval_keep_dims_0, x = block_6_attention_rmsnorm_abs)[name = string("block_6_attention_rmsnorm_maxval")]; fp16 block_6_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_6_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_6_attention_rmsnorm_maxval_clipped = clip(alpha = block_6_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_6_attention_rmsnorm_maxval_clipped_beta_0, x = block_6_attention_rmsnorm_maxval)[name = string("block_6_attention_rmsnorm_maxval_clipped")]; tensor block_6_attention_rmsnorm_scaled = real_div(x = block_5_residual_2, y = block_6_attention_rmsnorm_maxval_clipped)[name = string("block_6_attention_rmsnorm_scaled")]; tensor block_6_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_6_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_6_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_6_attention_rmsnorm_squared_sum_keep_dims_0, x = block_6_attention_rmsnorm_scaled)[name = string("block_6_attention_rmsnorm_squared_sum")]; fp16 block_6_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_6_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_6_attention_rmsnorm_rsqrt_epsilon_0, x = block_6_attention_rmsnorm_squared_sum)[name = string("block_6_attention_rmsnorm_rsqrt")]; fp16 block_6_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_6_attention_rmsnorm_dim_scaled = mul(x = block_6_attention_rmsnorm_scaled, y = block_6_attention_rmsnorm_dim_scaled_y_0)[name = string("block_6_attention_rmsnorm_dim_scaled")]; tensor block_6_attention_rmsnorm_normalized = mul(x = block_6_attention_rmsnorm_dim_scaled, y = block_6_attention_rmsnorm_rsqrt)[name = string("block_6_attention_rmsnorm_normalized")]; tensor block_6_attention_rmsnorm_y_0 = const()[name = string("block_6_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360850112)))]; tensor block_6_attention_rmsnorm = mul(x = block_6_attention_rmsnorm_normalized, y = block_6_attention_rmsnorm_y_0)[name = string("block_6_attention_rmsnorm")]; tensor attention_6_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360851968))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361626176))))[name = string("attention_6_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_25 = constexpr_blockwise_shift_scale(data = attention_6_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361663104))))[name = string("constexpr_blockwise_shift_scale_25")]; tensor attention_6_qkvproj_bias_0 = const()[name = string("attention_6_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361665472)))]; tensor attention_6_qkvproj_strides_0 = const()[name = string("attention_6_qkvproj_strides_0"), val = tensor([1])]; string attention_6_qkvproj_pad_type_0 = const()[name = string("attention_6_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_6_qkvproj_pad_0 = const()[name = string("attention_6_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_6_qkvproj_dilations_0 = const()[name = string("attention_6_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_6_qkvproj_groups_0 = const()[name = string("attention_6_qkvproj_groups_0"), val = int32(1)]; tensor attention_6_qkvproj = conv(bias = attention_6_qkvproj_bias_0, dilations = attention_6_qkvproj_dilations_0, groups = attention_6_qkvproj_groups_0, pad = attention_6_qkvproj_pad_0, pad_type = attention_6_qkvproj_pad_type_0, strides = attention_6_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_25, x = block_6_attention_rmsnorm)[name = string("attention_6_qkvproj")]; tensor attention_6_head_reshape_shape_0 = const()[name = string("attention_6_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_6_head_reshape = reshape(shape = attention_6_head_reshape_shape_0, x = attention_6_qkvproj)[name = string("attention_6_head_reshape")]; tensor attention_6_head_transpose_perm_0 = const()[name = string("attention_6_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_6_split_qkv_heads_axis_0 = const()[name = string("attention_6_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_6_split_qkv_heads_split_sizes_0 = const()[name = string("attention_6_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_6_head_transpose = transpose(perm = attention_6_head_transpose_perm_0, x = attention_6_head_reshape)[name = string("transpose_36")]; tensor attention_6_split_qkv_heads_0, tensor attention_6_split_qkv_heads_1, tensor attention_6_split_qkv_heads_2 = split(axis = attention_6_split_qkv_heads_axis_0, split_sizes = attention_6_split_qkv_heads_split_sizes_0, x = attention_6_head_transpose)[name = string("attention_6_split_qkv_heads")]; tensor attention_6_q_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_6_q_rope_lhs_mult")]; int32 attention_6_q_rotate_half_split_num_splits_0 = const()[name = string("attention_6_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_6_q_rotate_half_split_axis_0 = const()[name = string("attention_6_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_6_q_rotate_half_split_0, tensor attention_6_q_rotate_half_split_1 = split(axis = attention_6_q_rotate_half_split_axis_0, num_splits = attention_6_q_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_0)[name = string("attention_6_q_rotate_half_split")]; fp16 attention_6_q_rotate_half_neg_y_0 = const()[name = string("attention_6_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_6_q_rotate_half_neg = mul(x = attention_6_q_rotate_half_split_1, y = attention_6_q_rotate_half_neg_y_0)[name = string("attention_6_q_rotate_half_neg")]; int32 attention_6_q_rotate_half_concat_axis_0 = const()[name = string("attention_6_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_6_q_rotate_half_concat_interleave_0 = const()[name = string("attention_6_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_6_q_rotate_half_concat = concat(axis = attention_6_q_rotate_half_concat_axis_0, interleave = attention_6_q_rotate_half_concat_interleave_0, values = (attention_6_q_rotate_half_neg, attention_6_q_rotate_half_split_0))[name = string("attention_6_q_rotate_half_concat")]; tensor attention_6_q_rope_rhs_mult = mul(x = attention_6_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_q_rope_rhs_mult")]; tensor attention_6_q_rope = add(x = attention_6_q_rope_lhs_mult, y = attention_6_q_rope_rhs_mult)[name = string("attention_6_q_rope")]; tensor attention_6_k_rope_lhs_mult = mul(x = attention_6_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_6_k_rope_lhs_mult")]; int32 attention_6_k_rotate_half_split_num_splits_0 = const()[name = string("attention_6_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_6_k_rotate_half_split_axis_0 = const()[name = string("attention_6_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_6_k_rotate_half_split_0, tensor attention_6_k_rotate_half_split_1 = split(axis = attention_6_k_rotate_half_split_axis_0, num_splits = attention_6_k_rotate_half_split_num_splits_0, x = attention_6_split_qkv_heads_1)[name = string("attention_6_k_rotate_half_split")]; fp16 attention_6_k_rotate_half_neg_y_0 = const()[name = string("attention_6_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_6_k_rotate_half_neg = mul(x = attention_6_k_rotate_half_split_1, y = attention_6_k_rotate_half_neg_y_0)[name = string("attention_6_k_rotate_half_neg")]; int32 attention_6_k_rotate_half_concat_axis_0 = const()[name = string("attention_6_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_6_k_rotate_half_concat_interleave_0 = const()[name = string("attention_6_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_6_k_rotate_half_concat = concat(axis = attention_6_k_rotate_half_concat_axis_0, interleave = attention_6_k_rotate_half_concat_interleave_0, values = (attention_6_k_rotate_half_neg, attention_6_k_rotate_half_split_0))[name = string("attention_6_k_rotate_half_concat")]; tensor attention_6_k_rope_rhs_mult = mul(x = attention_6_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_6_k_rope_rhs_mult")]; tensor attention_6_k_rope = add(x = attention_6_k_rope_lhs_mult, y = attention_6_k_rope_rhs_mult)[name = string("attention_6_k_rope")]; int32 attention_6_q_splits_axis_0 = const()[name = string("attention_6_q_splits_axis_0"), val = int32(1)]; int32 attention_6_q_splits_num_splits_0 = const()[name = string("attention_6_q_splits_num_splits_0"), val = int32(2)]; tensor attention_6_q_splits_0, tensor attention_6_q_splits_1 = split(axis = attention_6_q_splits_axis_0, num_splits = attention_6_q_splits_num_splits_0, x = attention_6_q_rope)[name = string("attention_6_q_splits")]; tensor attention_6_update_begin_0_values0_0 = const()[name = string("attention_6_update_begin_0_values0_0"), val = tensor([6])]; tensor attention_6_update_begin_0_values1_0 = const()[name = string("attention_6_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_6_update_begin_0_values3_0 = const()[name = string("attention_6_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_6_update_begin_0_axis_0 = const()[name = string("attention_6_update_begin_0_axis_0"), val = int32(0)]; bool attention_6_update_begin_0_interleave_0 = const()[name = string("attention_6_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_6_update_begin_0 = concat(axis = attention_6_update_begin_0_axis_0, interleave = attention_6_update_begin_0_interleave_0, values = (attention_6_update_begin_0_values0_0, attention_6_update_begin_0_values1_0, query_pos1, attention_6_update_begin_0_values3_0))[name = string("attention_6_update_begin_0")]; tensor attention_6_update_end_0_values0_0 = const()[name = string("attention_6_update_end_0_values0_0"), val = tensor([7])]; tensor attention_6_update_end_0_values1_0 = const()[name = string("attention_6_update_end_0_values1_0"), val = tensor([2])]; tensor attention_6_update_end_0_values3_0 = const()[name = string("attention_6_update_end_0_values3_0"), val = tensor([64])]; int32 attention_6_update_end_0_axis_0 = const()[name = string("attention_6_update_end_0_axis_0"), val = int32(0)]; bool attention_6_update_end_0_interleave_0 = const()[name = string("attention_6_update_end_0_interleave_0"), val = bool(false)]; tensor attention_6_update_end_0 = concat(axis = attention_6_update_end_0_axis_0, interleave = attention_6_update_end_0_interleave_0, values = (attention_6_update_end_0_values0_0, attention_6_update_end_0_values1_0, end_pos_0, attention_6_update_end_0_values3_0))[name = string("attention_6_update_end_0")]; tensor attention_6_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_updated_key_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_key_cache_0_squeeze_mask_0, update = attention_6_k_rope, x = coreml_update_state_10)[name = string("attention_6_updated_key_cache_0")]; write_state(data = attention_6_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_12 = read_state(input = key_cache_state)[name = string("coreml_update_state_60")]; tensor attention_6_key_cache_begin_0 = const()[name = string("attention_6_key_cache_begin_0"), val = tensor([6, 0, 0, 0])]; tensor attention_6_key_cache_end_0 = const()[name = string("attention_6_key_cache_end_0"), val = tensor([7, 2, 512, 64])]; tensor attention_6_key_cache_squeeze_mask_0 = const()[name = string("attention_6_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_key_cache = slice_by_index(begin = attention_6_key_cache_begin_0, end = attention_6_key_cache_end_0, squeeze_mask = attention_6_key_cache_squeeze_mask_0, x = coreml_update_state_12)[name = string("attention_6_key_cache")]; int32 attention_6_key_cache_head_axis_0 = const()[name = string("attention_6_key_cache_head_axis_0"), val = int32(1)]; int32 attention_6_key_cache_head_num_splits_0 = const()[name = string("attention_6_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_6_key_cache_head_0, tensor attention_6_key_cache_head_1 = split(axis = attention_6_key_cache_head_axis_0, num_splits = attention_6_key_cache_head_num_splits_0, x = attention_6_key_cache)[name = string("attention_6_key_cache_head")]; tensor attention_6_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_6_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_updated_value_cache_0 = slice_update(begin = attention_6_update_begin_0, end = attention_6_update_end_0, squeeze_mask = attention_6_updated_value_cache_0_squeeze_mask_0, update = attention_6_split_qkv_heads_2, x = coreml_update_state_11)[name = string("attention_6_updated_value_cache_0")]; write_state(data = attention_6_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_13 = read_state(input = value_cache_state)[name = string("coreml_update_state_61")]; tensor attention_6_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_6_slice_current_layer_value_cache_begin_0"), val = tensor([6, 0, 0, 0])]; tensor attention_6_slice_current_layer_value_cache_end_0 = const()[name = string("attention_6_slice_current_layer_value_cache_end_0"), val = tensor([7, 2, 512, 64])]; tensor attention_6_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_6_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_6_slice_current_layer_value_cache = slice_by_index(begin = attention_6_slice_current_layer_value_cache_begin_0, end = attention_6_slice_current_layer_value_cache_end_0, squeeze_mask = attention_6_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_13)[name = string("attention_6_slice_current_layer_value_cache")]; int32 attention_6_slice_value_cache_heads_axis_0 = const()[name = string("attention_6_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_6_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_6_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_6_slice_value_cache_heads_0, tensor attention_6_slice_value_cache_heads_1 = split(axis = attention_6_slice_value_cache_heads_axis_0, num_splits = attention_6_slice_value_cache_heads_num_splits_0, x = attention_6_slice_current_layer_value_cache)[name = string("attention_6_slice_value_cache_heads")]; bool attention_6_scores_0_transpose_y_0 = const()[name = string("attention_6_scores_0_transpose_y_0"), val = bool(true)]; bool attention_6_scores_0_transpose_x_0 = const()[name = string("attention_6_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_6_scores_0 = matmul(transpose_x = attention_6_scores_0_transpose_x_0, transpose_y = attention_6_scores_0_transpose_y_0, x = attention_6_key_cache_head_0, y = attention_6_q_splits_0)[name = string("attention_6_scores_0")]; fp16 attention_6_scaled_scores_0_y_0 = const()[name = string("attention_6_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_6_scaled_scores_0 = mul(x = attention_6_scores_0, y = attention_6_scaled_scores_0_y_0)[name = string("attention_6_scaled_scores_0")]; tensor attention_6_masked_scaled_scores_0 = add(x = attention_6_scaled_scores_0, y = transpose_0)[name = string("attention_6_masked_scaled_scores_0")]; int32 softmax_12_axis_0 = const()[name = string("softmax_12_axis_0"), val = int32(-2)]; tensor softmax_12 = softmax(axis = softmax_12_axis_0, x = attention_6_masked_scaled_scores_0)[name = string("softmax_12")]; bool attention_6_attention_0_transpose_x_0 = const()[name = string("attention_6_attention_0_transpose_x_0"), val = bool(true)]; bool attention_6_attention_0_transpose_y_0 = const()[name = string("attention_6_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_6_attention_0 = matmul(transpose_x = attention_6_attention_0_transpose_x_0, transpose_y = attention_6_attention_0_transpose_y_0, x = softmax_12, y = attention_6_slice_value_cache_heads_0)[name = string("attention_6_attention_0")]; bool attention_6_scores_1_transpose_y_0 = const()[name = string("attention_6_scores_1_transpose_y_0"), val = bool(true)]; bool attention_6_scores_1_transpose_x_0 = const()[name = string("attention_6_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_6_scores_1 = matmul(transpose_x = attention_6_scores_1_transpose_x_0, transpose_y = attention_6_scores_1_transpose_y_0, x = attention_6_key_cache_head_1, y = attention_6_q_splits_1)[name = string("attention_6_scores_1")]; fp16 attention_6_scaled_scores_1_y_0 = const()[name = string("attention_6_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_6_scaled_scores_1 = mul(x = attention_6_scores_1, y = attention_6_scaled_scores_1_y_0)[name = string("attention_6_scaled_scores_1")]; tensor attention_6_masked_scaled_scores_1 = add(x = attention_6_scaled_scores_1, y = transpose_0)[name = string("attention_6_masked_scaled_scores_1")]; int32 softmax_13_axis_0 = const()[name = string("softmax_13_axis_0"), val = int32(-2)]; tensor softmax_13 = softmax(axis = softmax_13_axis_0, x = attention_6_masked_scaled_scores_1)[name = string("softmax_13")]; bool attention_6_attention_1_transpose_x_0 = const()[name = string("attention_6_attention_1_transpose_x_0"), val = bool(true)]; bool attention_6_attention_1_transpose_y_0 = const()[name = string("attention_6_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_6_attention_1 = matmul(transpose_x = attention_6_attention_1_transpose_x_0, transpose_y = attention_6_attention_1_transpose_y_0, x = softmax_13, y = attention_6_slice_value_cache_heads_1)[name = string("attention_6_attention_1")]; int32 attention_6_concat_attention_all_heads_axis_0 = const()[name = string("attention_6_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_6_concat_attention_all_heads_interleave_0 = const()[name = string("attention_6_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_6_concat_attention_all_heads = concat(axis = attention_6_concat_attention_all_heads_axis_0, interleave = attention_6_concat_attention_all_heads_interleave_0, values = (attention_6_attention_0, attention_6_attention_1))[name = string("attention_6_concat_attention_all_heads")]; tensor attention_6_channels_first_retransposed_perm_0 = const()[name = string("attention_6_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_6_reshaped_shape_0 = const()[name = string("attention_6_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_6_channels_first_retransposed = transpose(perm = attention_6_channels_first_retransposed_perm_0, x = attention_6_concat_attention_all_heads)[name = string("transpose_35")]; tensor attention_6_reshaped = reshape(shape = attention_6_reshaped_shape_0, x = attention_6_channels_first_retransposed)[name = string("attention_6_reshaped")]; tensor attention_6_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361667840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362270016))))[name = string("attention_6_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_26 = constexpr_blockwise_shift_scale(data = attention_6_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362298752))))[name = string("constexpr_blockwise_shift_scale_26")]; tensor attention_6_outproj_strides_0 = const()[name = string("attention_6_outproj_strides_0"), val = tensor([1])]; string attention_6_outproj_pad_type_0 = const()[name = string("attention_6_outproj_pad_type_0"), val = string("valid")]; tensor attention_6_outproj_pad_0 = const()[name = string("attention_6_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_6_outproj_dilations_0 = const()[name = string("attention_6_outproj_dilations_0"), val = tensor([1])]; int32 attention_6_outproj_groups_0 = const()[name = string("attention_6_outproj_groups_0"), val = int32(1)]; tensor attention_6_outproj = conv(dilations = attention_6_outproj_dilations_0, groups = attention_6_outproj_groups_0, pad = attention_6_outproj_pad_0, pad_type = attention_6_outproj_pad_type_0, strides = attention_6_outproj_strides_0, weight = constexpr_blockwise_shift_scale_26, x = attention_6_reshaped)[name = string("attention_6_outproj")]; tensor block_6_residual_1 = add(x = block_5_residual_2, y = attention_6_outproj)[name = string("block_6_residual_1")]; tensor block_6_ffn_rmsnorm_abs = abs(x = block_6_residual_1)[name = string("block_6_ffn_rmsnorm_abs")]; tensor block_6_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_6_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_6_ffn_rmsnorm_maxval = reduce_max(axes = block_6_ffn_rmsnorm_maxval_axes_0, keep_dims = block_6_ffn_rmsnorm_maxval_keep_dims_0, x = block_6_ffn_rmsnorm_abs)[name = string("block_6_ffn_rmsnorm_maxval")]; fp16 block_6_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_6_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_6_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_6_ffn_rmsnorm_maxval_clipped = clip(alpha = block_6_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_6_ffn_rmsnorm_maxval_clipped_beta_0, x = block_6_ffn_rmsnorm_maxval)[name = string("block_6_ffn_rmsnorm_maxval_clipped")]; tensor block_6_ffn_rmsnorm_scaled = real_div(x = block_6_residual_1, y = block_6_ffn_rmsnorm_maxval_clipped)[name = string("block_6_ffn_rmsnorm_scaled")]; tensor block_6_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_6_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_6_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_6_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_6_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_6_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_6_ffn_rmsnorm_scaled)[name = string("block_6_ffn_rmsnorm_squared_sum")]; fp16 block_6_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_6_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_6_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_6_ffn_rmsnorm_rsqrt_epsilon_0, x = block_6_ffn_rmsnorm_squared_sum)[name = string("block_6_ffn_rmsnorm_rsqrt")]; fp16 block_6_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_6_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_6_ffn_rmsnorm_dim_scaled = mul(x = block_6_ffn_rmsnorm_scaled, y = block_6_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_6_ffn_rmsnorm_dim_scaled")]; tensor block_6_ffn_rmsnorm_normalized = mul(x = block_6_ffn_rmsnorm_dim_scaled, y = block_6_ffn_rmsnorm_rsqrt)[name = string("block_6_ffn_rmsnorm_normalized")]; tensor block_6_ffn_rmsnorm_y_0 = const()[name = string("block_6_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362300608)))]; tensor block_6_ffn_rmsnorm = mul(x = block_6_ffn_rmsnorm_normalized, y = block_6_ffn_rmsnorm_y_0)[name = string("block_6_ffn_rmsnorm")]; tensor block_6_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362302464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365571136))))[name = string("block_6_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_27 = constexpr_blockwise_shift_scale(data = block_6_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365726848))))[name = string("constexpr_blockwise_shift_scale_27")]; tensor block_6_ffn_inproj_strides_0 = const()[name = string("block_6_ffn_inproj_strides_0"), val = tensor([1])]; string block_6_ffn_inproj_pad_type_0 = const()[name = string("block_6_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_6_ffn_inproj_pad_0 = const()[name = string("block_6_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_6_ffn_inproj_dilations_0 = const()[name = string("block_6_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_6_ffn_inproj_groups_0 = const()[name = string("block_6_ffn_inproj_groups_0"), val = int32(1)]; tensor block_6_ffn_inproj = conv(dilations = block_6_ffn_inproj_dilations_0, groups = block_6_ffn_inproj_groups_0, pad = block_6_ffn_inproj_pad_0, pad_type = block_6_ffn_inproj_pad_type_0, strides = block_6_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_27, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_inproj")]; tensor block_6_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(365736640))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369005312))))[name = string("block_6_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_28 = constexpr_blockwise_shift_scale(data = block_6_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369161024))))[name = string("constexpr_blockwise_shift_scale_28")]; tensor block_6_ffn_g_strides_0 = const()[name = string("block_6_ffn_g_strides_0"), val = tensor([1])]; string block_6_ffn_g_pad_type_0 = const()[name = string("block_6_ffn_g_pad_type_0"), val = string("valid")]; tensor block_6_ffn_g_pad_0 = const()[name = string("block_6_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_6_ffn_g_dilations_0 = const()[name = string("block_6_ffn_g_dilations_0"), val = tensor([1])]; int32 block_6_ffn_g_groups_0 = const()[name = string("block_6_ffn_g_groups_0"), val = int32(1)]; tensor block_6_ffn_g = conv(dilations = block_6_ffn_g_dilations_0, groups = block_6_ffn_g_groups_0, pad = block_6_ffn_g_pad_0, pad_type = block_6_ffn_g_pad_type_0, strides = block_6_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_28, x = block_6_ffn_rmsnorm)[name = string("block_6_ffn_g")]; tensor block_6_ffn_g_activation = silu(x = block_6_ffn_g)[name = string("block_6_ffn_g_activation")]; tensor block_6_ffn_x_gated = mul(x = block_6_ffn_inproj, y = block_6_ffn_g_activation)[name = string("block_6_ffn_x_gated")]; tensor block_6_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(369170816))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372439488))))[name = string("block_6_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_29 = constexpr_blockwise_shift_scale(data = block_6_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372468224))))[name = string("constexpr_blockwise_shift_scale_29")]; tensor block_6_ffn_outproj_strides_0 = const()[name = string("block_6_ffn_outproj_strides_0"), val = tensor([1])]; string block_6_ffn_outproj_pad_type_0 = const()[name = string("block_6_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_6_ffn_outproj_pad_0 = const()[name = string("block_6_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_6_ffn_outproj_dilations_0 = const()[name = string("block_6_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_6_ffn_outproj_groups_0 = const()[name = string("block_6_ffn_outproj_groups_0"), val = int32(1)]; tensor block_6_ffn_outproj = conv(dilations = block_6_ffn_outproj_dilations_0, groups = block_6_ffn_outproj_groups_0, pad = block_6_ffn_outproj_pad_0, pad_type = block_6_ffn_outproj_pad_type_0, strides = block_6_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_29, x = block_6_ffn_x_gated)[name = string("block_6_ffn_outproj")]; tensor block_6_residual_2 = add(x = block_6_ffn_outproj, y = block_6_residual_1)[name = string("block_6_residual_2")]; tensor block_7_attention_rmsnorm_abs = abs(x = block_6_residual_2)[name = string("block_7_attention_rmsnorm_abs")]; tensor block_7_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_7_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_7_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_7_attention_rmsnorm_maxval = reduce_max(axes = block_7_attention_rmsnorm_maxval_axes_0, keep_dims = block_7_attention_rmsnorm_maxval_keep_dims_0, x = block_7_attention_rmsnorm_abs)[name = string("block_7_attention_rmsnorm_maxval")]; fp16 block_7_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_7_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_7_attention_rmsnorm_maxval_clipped = clip(alpha = block_7_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_7_attention_rmsnorm_maxval_clipped_beta_0, x = block_7_attention_rmsnorm_maxval)[name = string("block_7_attention_rmsnorm_maxval_clipped")]; tensor block_7_attention_rmsnorm_scaled = real_div(x = block_6_residual_2, y = block_7_attention_rmsnorm_maxval_clipped)[name = string("block_7_attention_rmsnorm_scaled")]; tensor block_7_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_7_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_7_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_7_attention_rmsnorm_squared_sum_keep_dims_0, x = block_7_attention_rmsnorm_scaled)[name = string("block_7_attention_rmsnorm_squared_sum")]; fp16 block_7_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_7_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_7_attention_rmsnorm_rsqrt_epsilon_0, x = block_7_attention_rmsnorm_squared_sum)[name = string("block_7_attention_rmsnorm_rsqrt")]; fp16 block_7_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_7_attention_rmsnorm_dim_scaled = mul(x = block_7_attention_rmsnorm_scaled, y = block_7_attention_rmsnorm_dim_scaled_y_0)[name = string("block_7_attention_rmsnorm_dim_scaled")]; tensor block_7_attention_rmsnorm_normalized = mul(x = block_7_attention_rmsnorm_dim_scaled, y = block_7_attention_rmsnorm_rsqrt)[name = string("block_7_attention_rmsnorm_normalized")]; tensor block_7_attention_rmsnorm_y_0 = const()[name = string("block_7_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372470080)))]; tensor block_7_attention_rmsnorm = mul(x = block_7_attention_rmsnorm_normalized, y = block_7_attention_rmsnorm_y_0)[name = string("block_7_attention_rmsnorm")]; tensor attention_7_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372471936))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373246144))))[name = string("attention_7_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_30 = constexpr_blockwise_shift_scale(data = attention_7_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373283072))))[name = string("constexpr_blockwise_shift_scale_30")]; tensor attention_7_qkvproj_bias_0 = const()[name = string("attention_7_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373285440)))]; tensor attention_7_qkvproj_strides_0 = const()[name = string("attention_7_qkvproj_strides_0"), val = tensor([1])]; string attention_7_qkvproj_pad_type_0 = const()[name = string("attention_7_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_7_qkvproj_pad_0 = const()[name = string("attention_7_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_7_qkvproj_dilations_0 = const()[name = string("attention_7_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_7_qkvproj_groups_0 = const()[name = string("attention_7_qkvproj_groups_0"), val = int32(1)]; tensor attention_7_qkvproj = conv(bias = attention_7_qkvproj_bias_0, dilations = attention_7_qkvproj_dilations_0, groups = attention_7_qkvproj_groups_0, pad = attention_7_qkvproj_pad_0, pad_type = attention_7_qkvproj_pad_type_0, strides = attention_7_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_30, x = block_7_attention_rmsnorm)[name = string("attention_7_qkvproj")]; tensor attention_7_head_reshape_shape_0 = const()[name = string("attention_7_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_7_head_reshape = reshape(shape = attention_7_head_reshape_shape_0, x = attention_7_qkvproj)[name = string("attention_7_head_reshape")]; tensor attention_7_head_transpose_perm_0 = const()[name = string("attention_7_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_7_split_qkv_heads_axis_0 = const()[name = string("attention_7_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_7_split_qkv_heads_split_sizes_0 = const()[name = string("attention_7_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_7_head_transpose = transpose(perm = attention_7_head_transpose_perm_0, x = attention_7_head_reshape)[name = string("transpose_34")]; tensor attention_7_split_qkv_heads_0, tensor attention_7_split_qkv_heads_1, tensor attention_7_split_qkv_heads_2 = split(axis = attention_7_split_qkv_heads_axis_0, split_sizes = attention_7_split_qkv_heads_split_sizes_0, x = attention_7_head_transpose)[name = string("attention_7_split_qkv_heads")]; tensor attention_7_q_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_7_q_rope_lhs_mult")]; int32 attention_7_q_rotate_half_split_num_splits_0 = const()[name = string("attention_7_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_7_q_rotate_half_split_axis_0 = const()[name = string("attention_7_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_7_q_rotate_half_split_0, tensor attention_7_q_rotate_half_split_1 = split(axis = attention_7_q_rotate_half_split_axis_0, num_splits = attention_7_q_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_0)[name = string("attention_7_q_rotate_half_split")]; fp16 attention_7_q_rotate_half_neg_y_0 = const()[name = string("attention_7_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_7_q_rotate_half_neg = mul(x = attention_7_q_rotate_half_split_1, y = attention_7_q_rotate_half_neg_y_0)[name = string("attention_7_q_rotate_half_neg")]; int32 attention_7_q_rotate_half_concat_axis_0 = const()[name = string("attention_7_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_7_q_rotate_half_concat_interleave_0 = const()[name = string("attention_7_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_7_q_rotate_half_concat = concat(axis = attention_7_q_rotate_half_concat_axis_0, interleave = attention_7_q_rotate_half_concat_interleave_0, values = (attention_7_q_rotate_half_neg, attention_7_q_rotate_half_split_0))[name = string("attention_7_q_rotate_half_concat")]; tensor attention_7_q_rope_rhs_mult = mul(x = attention_7_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_q_rope_rhs_mult")]; tensor attention_7_q_rope = add(x = attention_7_q_rope_lhs_mult, y = attention_7_q_rope_rhs_mult)[name = string("attention_7_q_rope")]; tensor attention_7_k_rope_lhs_mult = mul(x = attention_7_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_7_k_rope_lhs_mult")]; int32 attention_7_k_rotate_half_split_num_splits_0 = const()[name = string("attention_7_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_7_k_rotate_half_split_axis_0 = const()[name = string("attention_7_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_7_k_rotate_half_split_0, tensor attention_7_k_rotate_half_split_1 = split(axis = attention_7_k_rotate_half_split_axis_0, num_splits = attention_7_k_rotate_half_split_num_splits_0, x = attention_7_split_qkv_heads_1)[name = string("attention_7_k_rotate_half_split")]; fp16 attention_7_k_rotate_half_neg_y_0 = const()[name = string("attention_7_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_7_k_rotate_half_neg = mul(x = attention_7_k_rotate_half_split_1, y = attention_7_k_rotate_half_neg_y_0)[name = string("attention_7_k_rotate_half_neg")]; int32 attention_7_k_rotate_half_concat_axis_0 = const()[name = string("attention_7_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_7_k_rotate_half_concat_interleave_0 = const()[name = string("attention_7_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_7_k_rotate_half_concat = concat(axis = attention_7_k_rotate_half_concat_axis_0, interleave = attention_7_k_rotate_half_concat_interleave_0, values = (attention_7_k_rotate_half_neg, attention_7_k_rotate_half_split_0))[name = string("attention_7_k_rotate_half_concat")]; tensor attention_7_k_rope_rhs_mult = mul(x = attention_7_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_7_k_rope_rhs_mult")]; tensor attention_7_k_rope = add(x = attention_7_k_rope_lhs_mult, y = attention_7_k_rope_rhs_mult)[name = string("attention_7_k_rope")]; int32 attention_7_q_splits_axis_0 = const()[name = string("attention_7_q_splits_axis_0"), val = int32(1)]; int32 attention_7_q_splits_num_splits_0 = const()[name = string("attention_7_q_splits_num_splits_0"), val = int32(2)]; tensor attention_7_q_splits_0, tensor attention_7_q_splits_1 = split(axis = attention_7_q_splits_axis_0, num_splits = attention_7_q_splits_num_splits_0, x = attention_7_q_rope)[name = string("attention_7_q_splits")]; tensor attention_7_update_begin_0_values0_0 = const()[name = string("attention_7_update_begin_0_values0_0"), val = tensor([7])]; tensor attention_7_update_begin_0_values1_0 = const()[name = string("attention_7_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_7_update_begin_0_values3_0 = const()[name = string("attention_7_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_7_update_begin_0_axis_0 = const()[name = string("attention_7_update_begin_0_axis_0"), val = int32(0)]; bool attention_7_update_begin_0_interleave_0 = const()[name = string("attention_7_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_7_update_begin_0 = concat(axis = attention_7_update_begin_0_axis_0, interleave = attention_7_update_begin_0_interleave_0, values = (attention_7_update_begin_0_values0_0, attention_7_update_begin_0_values1_0, query_pos1, attention_7_update_begin_0_values3_0))[name = string("attention_7_update_begin_0")]; tensor attention_7_update_end_0_values0_0 = const()[name = string("attention_7_update_end_0_values0_0"), val = tensor([8])]; tensor attention_7_update_end_0_values1_0 = const()[name = string("attention_7_update_end_0_values1_0"), val = tensor([2])]; tensor attention_7_update_end_0_values3_0 = const()[name = string("attention_7_update_end_0_values3_0"), val = tensor([64])]; int32 attention_7_update_end_0_axis_0 = const()[name = string("attention_7_update_end_0_axis_0"), val = int32(0)]; bool attention_7_update_end_0_interleave_0 = const()[name = string("attention_7_update_end_0_interleave_0"), val = bool(false)]; tensor attention_7_update_end_0 = concat(axis = attention_7_update_end_0_axis_0, interleave = attention_7_update_end_0_interleave_0, values = (attention_7_update_end_0_values0_0, attention_7_update_end_0_values1_0, end_pos_0, attention_7_update_end_0_values3_0))[name = string("attention_7_update_end_0")]; tensor attention_7_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_updated_key_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_key_cache_0_squeeze_mask_0, update = attention_7_k_rope, x = coreml_update_state_12)[name = string("attention_7_updated_key_cache_0")]; write_state(data = attention_7_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_14 = read_state(input = key_cache_state)[name = string("coreml_update_state_62")]; tensor attention_7_key_cache_begin_0 = const()[name = string("attention_7_key_cache_begin_0"), val = tensor([7, 0, 0, 0])]; tensor attention_7_key_cache_end_0 = const()[name = string("attention_7_key_cache_end_0"), val = tensor([8, 2, 512, 64])]; tensor attention_7_key_cache_squeeze_mask_0 = const()[name = string("attention_7_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_key_cache = slice_by_index(begin = attention_7_key_cache_begin_0, end = attention_7_key_cache_end_0, squeeze_mask = attention_7_key_cache_squeeze_mask_0, x = coreml_update_state_14)[name = string("attention_7_key_cache")]; int32 attention_7_key_cache_head_axis_0 = const()[name = string("attention_7_key_cache_head_axis_0"), val = int32(1)]; int32 attention_7_key_cache_head_num_splits_0 = const()[name = string("attention_7_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_7_key_cache_head_0, tensor attention_7_key_cache_head_1 = split(axis = attention_7_key_cache_head_axis_0, num_splits = attention_7_key_cache_head_num_splits_0, x = attention_7_key_cache)[name = string("attention_7_key_cache_head")]; tensor attention_7_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_7_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_updated_value_cache_0 = slice_update(begin = attention_7_update_begin_0, end = attention_7_update_end_0, squeeze_mask = attention_7_updated_value_cache_0_squeeze_mask_0, update = attention_7_split_qkv_heads_2, x = coreml_update_state_13)[name = string("attention_7_updated_value_cache_0")]; write_state(data = attention_7_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_15 = read_state(input = value_cache_state)[name = string("coreml_update_state_63")]; tensor attention_7_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_7_slice_current_layer_value_cache_begin_0"), val = tensor([7, 0, 0, 0])]; tensor attention_7_slice_current_layer_value_cache_end_0 = const()[name = string("attention_7_slice_current_layer_value_cache_end_0"), val = tensor([8, 2, 512, 64])]; tensor attention_7_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_7_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_7_slice_current_layer_value_cache = slice_by_index(begin = attention_7_slice_current_layer_value_cache_begin_0, end = attention_7_slice_current_layer_value_cache_end_0, squeeze_mask = attention_7_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_15)[name = string("attention_7_slice_current_layer_value_cache")]; int32 attention_7_slice_value_cache_heads_axis_0 = const()[name = string("attention_7_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_7_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_7_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_7_slice_value_cache_heads_0, tensor attention_7_slice_value_cache_heads_1 = split(axis = attention_7_slice_value_cache_heads_axis_0, num_splits = attention_7_slice_value_cache_heads_num_splits_0, x = attention_7_slice_current_layer_value_cache)[name = string("attention_7_slice_value_cache_heads")]; bool attention_7_scores_0_transpose_y_0 = const()[name = string("attention_7_scores_0_transpose_y_0"), val = bool(true)]; bool attention_7_scores_0_transpose_x_0 = const()[name = string("attention_7_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_7_scores_0 = matmul(transpose_x = attention_7_scores_0_transpose_x_0, transpose_y = attention_7_scores_0_transpose_y_0, x = attention_7_key_cache_head_0, y = attention_7_q_splits_0)[name = string("attention_7_scores_0")]; fp16 attention_7_scaled_scores_0_y_0 = const()[name = string("attention_7_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_7_scaled_scores_0 = mul(x = attention_7_scores_0, y = attention_7_scaled_scores_0_y_0)[name = string("attention_7_scaled_scores_0")]; tensor attention_7_masked_scaled_scores_0 = add(x = attention_7_scaled_scores_0, y = transpose_0)[name = string("attention_7_masked_scaled_scores_0")]; int32 softmax_14_axis_0 = const()[name = string("softmax_14_axis_0"), val = int32(-2)]; tensor softmax_14 = softmax(axis = softmax_14_axis_0, x = attention_7_masked_scaled_scores_0)[name = string("softmax_14")]; bool attention_7_attention_0_transpose_x_0 = const()[name = string("attention_7_attention_0_transpose_x_0"), val = bool(true)]; bool attention_7_attention_0_transpose_y_0 = const()[name = string("attention_7_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_7_attention_0 = matmul(transpose_x = attention_7_attention_0_transpose_x_0, transpose_y = attention_7_attention_0_transpose_y_0, x = softmax_14, y = attention_7_slice_value_cache_heads_0)[name = string("attention_7_attention_0")]; bool attention_7_scores_1_transpose_y_0 = const()[name = string("attention_7_scores_1_transpose_y_0"), val = bool(true)]; bool attention_7_scores_1_transpose_x_0 = const()[name = string("attention_7_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_7_scores_1 = matmul(transpose_x = attention_7_scores_1_transpose_x_0, transpose_y = attention_7_scores_1_transpose_y_0, x = attention_7_key_cache_head_1, y = attention_7_q_splits_1)[name = string("attention_7_scores_1")]; fp16 attention_7_scaled_scores_1_y_0 = const()[name = string("attention_7_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_7_scaled_scores_1 = mul(x = attention_7_scores_1, y = attention_7_scaled_scores_1_y_0)[name = string("attention_7_scaled_scores_1")]; tensor attention_7_masked_scaled_scores_1 = add(x = attention_7_scaled_scores_1, y = transpose_0)[name = string("attention_7_masked_scaled_scores_1")]; int32 softmax_15_axis_0 = const()[name = string("softmax_15_axis_0"), val = int32(-2)]; tensor softmax_15 = softmax(axis = softmax_15_axis_0, x = attention_7_masked_scaled_scores_1)[name = string("softmax_15")]; bool attention_7_attention_1_transpose_x_0 = const()[name = string("attention_7_attention_1_transpose_x_0"), val = bool(true)]; bool attention_7_attention_1_transpose_y_0 = const()[name = string("attention_7_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_7_attention_1 = matmul(transpose_x = attention_7_attention_1_transpose_x_0, transpose_y = attention_7_attention_1_transpose_y_0, x = softmax_15, y = attention_7_slice_value_cache_heads_1)[name = string("attention_7_attention_1")]; int32 attention_7_concat_attention_all_heads_axis_0 = const()[name = string("attention_7_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_7_concat_attention_all_heads_interleave_0 = const()[name = string("attention_7_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_7_concat_attention_all_heads = concat(axis = attention_7_concat_attention_all_heads_axis_0, interleave = attention_7_concat_attention_all_heads_interleave_0, values = (attention_7_attention_0, attention_7_attention_1))[name = string("attention_7_concat_attention_all_heads")]; tensor attention_7_channels_first_retransposed_perm_0 = const()[name = string("attention_7_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_7_reshaped_shape_0 = const()[name = string("attention_7_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_7_channels_first_retransposed = transpose(perm = attention_7_channels_first_retransposed_perm_0, x = attention_7_concat_attention_all_heads)[name = string("transpose_33")]; tensor attention_7_reshaped = reshape(shape = attention_7_reshaped_shape_0, x = attention_7_channels_first_retransposed)[name = string("attention_7_reshaped")]; tensor attention_7_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373287808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373889984))))[name = string("attention_7_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_31 = constexpr_blockwise_shift_scale(data = attention_7_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373918720))))[name = string("constexpr_blockwise_shift_scale_31")]; tensor attention_7_outproj_strides_0 = const()[name = string("attention_7_outproj_strides_0"), val = tensor([1])]; string attention_7_outproj_pad_type_0 = const()[name = string("attention_7_outproj_pad_type_0"), val = string("valid")]; tensor attention_7_outproj_pad_0 = const()[name = string("attention_7_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_7_outproj_dilations_0 = const()[name = string("attention_7_outproj_dilations_0"), val = tensor([1])]; int32 attention_7_outproj_groups_0 = const()[name = string("attention_7_outproj_groups_0"), val = int32(1)]; tensor attention_7_outproj = conv(dilations = attention_7_outproj_dilations_0, groups = attention_7_outproj_groups_0, pad = attention_7_outproj_pad_0, pad_type = attention_7_outproj_pad_type_0, strides = attention_7_outproj_strides_0, weight = constexpr_blockwise_shift_scale_31, x = attention_7_reshaped)[name = string("attention_7_outproj")]; tensor block_7_residual_1 = add(x = block_6_residual_2, y = attention_7_outproj)[name = string("block_7_residual_1")]; tensor block_7_ffn_rmsnorm_abs = abs(x = block_7_residual_1)[name = string("block_7_ffn_rmsnorm_abs")]; tensor block_7_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_7_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_7_ffn_rmsnorm_maxval = reduce_max(axes = block_7_ffn_rmsnorm_maxval_axes_0, keep_dims = block_7_ffn_rmsnorm_maxval_keep_dims_0, x = block_7_ffn_rmsnorm_abs)[name = string("block_7_ffn_rmsnorm_maxval")]; fp16 block_7_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_7_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_7_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_7_ffn_rmsnorm_maxval_clipped = clip(alpha = block_7_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_7_ffn_rmsnorm_maxval_clipped_beta_0, x = block_7_ffn_rmsnorm_maxval)[name = string("block_7_ffn_rmsnorm_maxval_clipped")]; tensor block_7_ffn_rmsnorm_scaled = real_div(x = block_7_residual_1, y = block_7_ffn_rmsnorm_maxval_clipped)[name = string("block_7_ffn_rmsnorm_scaled")]; tensor block_7_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_7_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_7_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_7_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_7_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_7_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_7_ffn_rmsnorm_scaled)[name = string("block_7_ffn_rmsnorm_squared_sum")]; fp16 block_7_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_7_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_7_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_7_ffn_rmsnorm_rsqrt_epsilon_0, x = block_7_ffn_rmsnorm_squared_sum)[name = string("block_7_ffn_rmsnorm_rsqrt")]; fp16 block_7_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_7_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_7_ffn_rmsnorm_dim_scaled = mul(x = block_7_ffn_rmsnorm_scaled, y = block_7_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_7_ffn_rmsnorm_dim_scaled")]; tensor block_7_ffn_rmsnorm_normalized = mul(x = block_7_ffn_rmsnorm_dim_scaled, y = block_7_ffn_rmsnorm_rsqrt)[name = string("block_7_ffn_rmsnorm_normalized")]; tensor block_7_ffn_rmsnorm_y_0 = const()[name = string("block_7_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373920576)))]; tensor block_7_ffn_rmsnorm = mul(x = block_7_ffn_rmsnorm_normalized, y = block_7_ffn_rmsnorm_y_0)[name = string("block_7_ffn_rmsnorm")]; tensor block_7_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373922432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377191104))))[name = string("block_7_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_32 = constexpr_blockwise_shift_scale(data = block_7_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377346816))))[name = string("constexpr_blockwise_shift_scale_32")]; tensor block_7_ffn_inproj_strides_0 = const()[name = string("block_7_ffn_inproj_strides_0"), val = tensor([1])]; string block_7_ffn_inproj_pad_type_0 = const()[name = string("block_7_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_7_ffn_inproj_pad_0 = const()[name = string("block_7_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_7_ffn_inproj_dilations_0 = const()[name = string("block_7_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_7_ffn_inproj_groups_0 = const()[name = string("block_7_ffn_inproj_groups_0"), val = int32(1)]; tensor block_7_ffn_inproj = conv(dilations = block_7_ffn_inproj_dilations_0, groups = block_7_ffn_inproj_groups_0, pad = block_7_ffn_inproj_pad_0, pad_type = block_7_ffn_inproj_pad_type_0, strides = block_7_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_32, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_inproj")]; tensor block_7_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377356608))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380625280))))[name = string("block_7_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_33 = constexpr_blockwise_shift_scale(data = block_7_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380780992))))[name = string("constexpr_blockwise_shift_scale_33")]; tensor block_7_ffn_g_strides_0 = const()[name = string("block_7_ffn_g_strides_0"), val = tensor([1])]; string block_7_ffn_g_pad_type_0 = const()[name = string("block_7_ffn_g_pad_type_0"), val = string("valid")]; tensor block_7_ffn_g_pad_0 = const()[name = string("block_7_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_7_ffn_g_dilations_0 = const()[name = string("block_7_ffn_g_dilations_0"), val = tensor([1])]; int32 block_7_ffn_g_groups_0 = const()[name = string("block_7_ffn_g_groups_0"), val = int32(1)]; tensor block_7_ffn_g = conv(dilations = block_7_ffn_g_dilations_0, groups = block_7_ffn_g_groups_0, pad = block_7_ffn_g_pad_0, pad_type = block_7_ffn_g_pad_type_0, strides = block_7_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_33, x = block_7_ffn_rmsnorm)[name = string("block_7_ffn_g")]; tensor block_7_ffn_g_activation = silu(x = block_7_ffn_g)[name = string("block_7_ffn_g_activation")]; tensor block_7_ffn_x_gated = mul(x = block_7_ffn_inproj, y = block_7_ffn_g_activation)[name = string("block_7_ffn_x_gated")]; tensor block_7_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380790784))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384059456))))[name = string("block_7_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_34 = constexpr_blockwise_shift_scale(data = block_7_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384088192))))[name = string("constexpr_blockwise_shift_scale_34")]; tensor block_7_ffn_outproj_strides_0 = const()[name = string("block_7_ffn_outproj_strides_0"), val = tensor([1])]; string block_7_ffn_outproj_pad_type_0 = const()[name = string("block_7_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_7_ffn_outproj_pad_0 = const()[name = string("block_7_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_7_ffn_outproj_dilations_0 = const()[name = string("block_7_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_7_ffn_outproj_groups_0 = const()[name = string("block_7_ffn_outproj_groups_0"), val = int32(1)]; tensor block_7_ffn_outproj = conv(dilations = block_7_ffn_outproj_dilations_0, groups = block_7_ffn_outproj_groups_0, pad = block_7_ffn_outproj_pad_0, pad_type = block_7_ffn_outproj_pad_type_0, strides = block_7_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_34, x = block_7_ffn_x_gated)[name = string("block_7_ffn_outproj")]; tensor block_7_residual_2 = add(x = block_7_ffn_outproj, y = block_7_residual_1)[name = string("block_7_residual_2")]; tensor block_8_attention_rmsnorm_abs = abs(x = block_7_residual_2)[name = string("block_8_attention_rmsnorm_abs")]; tensor block_8_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_8_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_8_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_8_attention_rmsnorm_maxval = reduce_max(axes = block_8_attention_rmsnorm_maxval_axes_0, keep_dims = block_8_attention_rmsnorm_maxval_keep_dims_0, x = block_8_attention_rmsnorm_abs)[name = string("block_8_attention_rmsnorm_maxval")]; fp16 block_8_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_8_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_8_attention_rmsnorm_maxval_clipped = clip(alpha = block_8_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_8_attention_rmsnorm_maxval_clipped_beta_0, x = block_8_attention_rmsnorm_maxval)[name = string("block_8_attention_rmsnorm_maxval_clipped")]; tensor block_8_attention_rmsnorm_scaled = real_div(x = block_7_residual_2, y = block_8_attention_rmsnorm_maxval_clipped)[name = string("block_8_attention_rmsnorm_scaled")]; tensor block_8_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_8_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_8_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_8_attention_rmsnorm_squared_sum_keep_dims_0, x = block_8_attention_rmsnorm_scaled)[name = string("block_8_attention_rmsnorm_squared_sum")]; fp16 block_8_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_8_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_8_attention_rmsnorm_rsqrt_epsilon_0, x = block_8_attention_rmsnorm_squared_sum)[name = string("block_8_attention_rmsnorm_rsqrt")]; fp16 block_8_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_8_attention_rmsnorm_dim_scaled = mul(x = block_8_attention_rmsnorm_scaled, y = block_8_attention_rmsnorm_dim_scaled_y_0)[name = string("block_8_attention_rmsnorm_dim_scaled")]; tensor block_8_attention_rmsnorm_normalized = mul(x = block_8_attention_rmsnorm_dim_scaled, y = block_8_attention_rmsnorm_rsqrt)[name = string("block_8_attention_rmsnorm_normalized")]; tensor block_8_attention_rmsnorm_y_0 = const()[name = string("block_8_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384090048)))]; tensor block_8_attention_rmsnorm = mul(x = block_8_attention_rmsnorm_normalized, y = block_8_attention_rmsnorm_y_0)[name = string("block_8_attention_rmsnorm")]; tensor attention_8_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384091904))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384866112))))[name = string("attention_8_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_35 = constexpr_blockwise_shift_scale(data = attention_8_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384903040))))[name = string("constexpr_blockwise_shift_scale_35")]; tensor attention_8_qkvproj_bias_0 = const()[name = string("attention_8_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384905408)))]; tensor attention_8_qkvproj_strides_0 = const()[name = string("attention_8_qkvproj_strides_0"), val = tensor([1])]; string attention_8_qkvproj_pad_type_0 = const()[name = string("attention_8_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_8_qkvproj_pad_0 = const()[name = string("attention_8_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_8_qkvproj_dilations_0 = const()[name = string("attention_8_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_8_qkvproj_groups_0 = const()[name = string("attention_8_qkvproj_groups_0"), val = int32(1)]; tensor attention_8_qkvproj = conv(bias = attention_8_qkvproj_bias_0, dilations = attention_8_qkvproj_dilations_0, groups = attention_8_qkvproj_groups_0, pad = attention_8_qkvproj_pad_0, pad_type = attention_8_qkvproj_pad_type_0, strides = attention_8_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_35, x = block_8_attention_rmsnorm)[name = string("attention_8_qkvproj")]; tensor attention_8_head_reshape_shape_0 = const()[name = string("attention_8_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_8_head_reshape = reshape(shape = attention_8_head_reshape_shape_0, x = attention_8_qkvproj)[name = string("attention_8_head_reshape")]; tensor attention_8_head_transpose_perm_0 = const()[name = string("attention_8_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_8_split_qkv_heads_axis_0 = const()[name = string("attention_8_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_8_split_qkv_heads_split_sizes_0 = const()[name = string("attention_8_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_8_head_transpose = transpose(perm = attention_8_head_transpose_perm_0, x = attention_8_head_reshape)[name = string("transpose_32")]; tensor attention_8_split_qkv_heads_0, tensor attention_8_split_qkv_heads_1, tensor attention_8_split_qkv_heads_2 = split(axis = attention_8_split_qkv_heads_axis_0, split_sizes = attention_8_split_qkv_heads_split_sizes_0, x = attention_8_head_transpose)[name = string("attention_8_split_qkv_heads")]; tensor attention_8_q_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_8_q_rope_lhs_mult")]; int32 attention_8_q_rotate_half_split_num_splits_0 = const()[name = string("attention_8_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_8_q_rotate_half_split_axis_0 = const()[name = string("attention_8_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_8_q_rotate_half_split_0, tensor attention_8_q_rotate_half_split_1 = split(axis = attention_8_q_rotate_half_split_axis_0, num_splits = attention_8_q_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_0)[name = string("attention_8_q_rotate_half_split")]; fp16 attention_8_q_rotate_half_neg_y_0 = const()[name = string("attention_8_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_8_q_rotate_half_neg = mul(x = attention_8_q_rotate_half_split_1, y = attention_8_q_rotate_half_neg_y_0)[name = string("attention_8_q_rotate_half_neg")]; int32 attention_8_q_rotate_half_concat_axis_0 = const()[name = string("attention_8_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_8_q_rotate_half_concat_interleave_0 = const()[name = string("attention_8_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_8_q_rotate_half_concat = concat(axis = attention_8_q_rotate_half_concat_axis_0, interleave = attention_8_q_rotate_half_concat_interleave_0, values = (attention_8_q_rotate_half_neg, attention_8_q_rotate_half_split_0))[name = string("attention_8_q_rotate_half_concat")]; tensor attention_8_q_rope_rhs_mult = mul(x = attention_8_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_q_rope_rhs_mult")]; tensor attention_8_q_rope = add(x = attention_8_q_rope_lhs_mult, y = attention_8_q_rope_rhs_mult)[name = string("attention_8_q_rope")]; tensor attention_8_k_rope_lhs_mult = mul(x = attention_8_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_8_k_rope_lhs_mult")]; int32 attention_8_k_rotate_half_split_num_splits_0 = const()[name = string("attention_8_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_8_k_rotate_half_split_axis_0 = const()[name = string("attention_8_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_8_k_rotate_half_split_0, tensor attention_8_k_rotate_half_split_1 = split(axis = attention_8_k_rotate_half_split_axis_0, num_splits = attention_8_k_rotate_half_split_num_splits_0, x = attention_8_split_qkv_heads_1)[name = string("attention_8_k_rotate_half_split")]; fp16 attention_8_k_rotate_half_neg_y_0 = const()[name = string("attention_8_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_8_k_rotate_half_neg = mul(x = attention_8_k_rotate_half_split_1, y = attention_8_k_rotate_half_neg_y_0)[name = string("attention_8_k_rotate_half_neg")]; int32 attention_8_k_rotate_half_concat_axis_0 = const()[name = string("attention_8_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_8_k_rotate_half_concat_interleave_0 = const()[name = string("attention_8_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_8_k_rotate_half_concat = concat(axis = attention_8_k_rotate_half_concat_axis_0, interleave = attention_8_k_rotate_half_concat_interleave_0, values = (attention_8_k_rotate_half_neg, attention_8_k_rotate_half_split_0))[name = string("attention_8_k_rotate_half_concat")]; tensor attention_8_k_rope_rhs_mult = mul(x = attention_8_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_8_k_rope_rhs_mult")]; tensor attention_8_k_rope = add(x = attention_8_k_rope_lhs_mult, y = attention_8_k_rope_rhs_mult)[name = string("attention_8_k_rope")]; int32 attention_8_q_splits_axis_0 = const()[name = string("attention_8_q_splits_axis_0"), val = int32(1)]; int32 attention_8_q_splits_num_splits_0 = const()[name = string("attention_8_q_splits_num_splits_0"), val = int32(2)]; tensor attention_8_q_splits_0, tensor attention_8_q_splits_1 = split(axis = attention_8_q_splits_axis_0, num_splits = attention_8_q_splits_num_splits_0, x = attention_8_q_rope)[name = string("attention_8_q_splits")]; tensor attention_8_update_begin_0_values0_0 = const()[name = string("attention_8_update_begin_0_values0_0"), val = tensor([8])]; tensor attention_8_update_begin_0_values1_0 = const()[name = string("attention_8_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_8_update_begin_0_values3_0 = const()[name = string("attention_8_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_8_update_begin_0_axis_0 = const()[name = string("attention_8_update_begin_0_axis_0"), val = int32(0)]; bool attention_8_update_begin_0_interleave_0 = const()[name = string("attention_8_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_8_update_begin_0 = concat(axis = attention_8_update_begin_0_axis_0, interleave = attention_8_update_begin_0_interleave_0, values = (attention_8_update_begin_0_values0_0, attention_8_update_begin_0_values1_0, query_pos1, attention_8_update_begin_0_values3_0))[name = string("attention_8_update_begin_0")]; tensor attention_8_update_end_0_values0_0 = const()[name = string("attention_8_update_end_0_values0_0"), val = tensor([9])]; tensor attention_8_update_end_0_values1_0 = const()[name = string("attention_8_update_end_0_values1_0"), val = tensor([2])]; tensor attention_8_update_end_0_values3_0 = const()[name = string("attention_8_update_end_0_values3_0"), val = tensor([64])]; int32 attention_8_update_end_0_axis_0 = const()[name = string("attention_8_update_end_0_axis_0"), val = int32(0)]; bool attention_8_update_end_0_interleave_0 = const()[name = string("attention_8_update_end_0_interleave_0"), val = bool(false)]; tensor attention_8_update_end_0 = concat(axis = attention_8_update_end_0_axis_0, interleave = attention_8_update_end_0_interleave_0, values = (attention_8_update_end_0_values0_0, attention_8_update_end_0_values1_0, end_pos_0, attention_8_update_end_0_values3_0))[name = string("attention_8_update_end_0")]; tensor attention_8_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_updated_key_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_key_cache_0_squeeze_mask_0, update = attention_8_k_rope, x = coreml_update_state_14)[name = string("attention_8_updated_key_cache_0")]; write_state(data = attention_8_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_16 = read_state(input = key_cache_state)[name = string("coreml_update_state_64")]; tensor attention_8_key_cache_begin_0 = const()[name = string("attention_8_key_cache_begin_0"), val = tensor([8, 0, 0, 0])]; tensor attention_8_key_cache_end_0 = const()[name = string("attention_8_key_cache_end_0"), val = tensor([9, 2, 512, 64])]; tensor attention_8_key_cache_squeeze_mask_0 = const()[name = string("attention_8_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_key_cache = slice_by_index(begin = attention_8_key_cache_begin_0, end = attention_8_key_cache_end_0, squeeze_mask = attention_8_key_cache_squeeze_mask_0, x = coreml_update_state_16)[name = string("attention_8_key_cache")]; int32 attention_8_key_cache_head_axis_0 = const()[name = string("attention_8_key_cache_head_axis_0"), val = int32(1)]; int32 attention_8_key_cache_head_num_splits_0 = const()[name = string("attention_8_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_8_key_cache_head_0, tensor attention_8_key_cache_head_1 = split(axis = attention_8_key_cache_head_axis_0, num_splits = attention_8_key_cache_head_num_splits_0, x = attention_8_key_cache)[name = string("attention_8_key_cache_head")]; tensor attention_8_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_8_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_updated_value_cache_0 = slice_update(begin = attention_8_update_begin_0, end = attention_8_update_end_0, squeeze_mask = attention_8_updated_value_cache_0_squeeze_mask_0, update = attention_8_split_qkv_heads_2, x = coreml_update_state_15)[name = string("attention_8_updated_value_cache_0")]; write_state(data = attention_8_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_17 = read_state(input = value_cache_state)[name = string("coreml_update_state_65")]; tensor attention_8_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_8_slice_current_layer_value_cache_begin_0"), val = tensor([8, 0, 0, 0])]; tensor attention_8_slice_current_layer_value_cache_end_0 = const()[name = string("attention_8_slice_current_layer_value_cache_end_0"), val = tensor([9, 2, 512, 64])]; tensor attention_8_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_8_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_8_slice_current_layer_value_cache = slice_by_index(begin = attention_8_slice_current_layer_value_cache_begin_0, end = attention_8_slice_current_layer_value_cache_end_0, squeeze_mask = attention_8_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_17)[name = string("attention_8_slice_current_layer_value_cache")]; int32 attention_8_slice_value_cache_heads_axis_0 = const()[name = string("attention_8_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_8_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_8_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_8_slice_value_cache_heads_0, tensor attention_8_slice_value_cache_heads_1 = split(axis = attention_8_slice_value_cache_heads_axis_0, num_splits = attention_8_slice_value_cache_heads_num_splits_0, x = attention_8_slice_current_layer_value_cache)[name = string("attention_8_slice_value_cache_heads")]; bool attention_8_scores_0_transpose_y_0 = const()[name = string("attention_8_scores_0_transpose_y_0"), val = bool(true)]; bool attention_8_scores_0_transpose_x_0 = const()[name = string("attention_8_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_8_scores_0 = matmul(transpose_x = attention_8_scores_0_transpose_x_0, transpose_y = attention_8_scores_0_transpose_y_0, x = attention_8_key_cache_head_0, y = attention_8_q_splits_0)[name = string("attention_8_scores_0")]; fp16 attention_8_scaled_scores_0_y_0 = const()[name = string("attention_8_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_8_scaled_scores_0 = mul(x = attention_8_scores_0, y = attention_8_scaled_scores_0_y_0)[name = string("attention_8_scaled_scores_0")]; tensor attention_8_masked_scaled_scores_0 = add(x = attention_8_scaled_scores_0, y = transpose_0)[name = string("attention_8_masked_scaled_scores_0")]; int32 softmax_16_axis_0 = const()[name = string("softmax_16_axis_0"), val = int32(-2)]; tensor softmax_16 = softmax(axis = softmax_16_axis_0, x = attention_8_masked_scaled_scores_0)[name = string("softmax_16")]; bool attention_8_attention_0_transpose_x_0 = const()[name = string("attention_8_attention_0_transpose_x_0"), val = bool(true)]; bool attention_8_attention_0_transpose_y_0 = const()[name = string("attention_8_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_8_attention_0 = matmul(transpose_x = attention_8_attention_0_transpose_x_0, transpose_y = attention_8_attention_0_transpose_y_0, x = softmax_16, y = attention_8_slice_value_cache_heads_0)[name = string("attention_8_attention_0")]; bool attention_8_scores_1_transpose_y_0 = const()[name = string("attention_8_scores_1_transpose_y_0"), val = bool(true)]; bool attention_8_scores_1_transpose_x_0 = const()[name = string("attention_8_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_8_scores_1 = matmul(transpose_x = attention_8_scores_1_transpose_x_0, transpose_y = attention_8_scores_1_transpose_y_0, x = attention_8_key_cache_head_1, y = attention_8_q_splits_1)[name = string("attention_8_scores_1")]; fp16 attention_8_scaled_scores_1_y_0 = const()[name = string("attention_8_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_8_scaled_scores_1 = mul(x = attention_8_scores_1, y = attention_8_scaled_scores_1_y_0)[name = string("attention_8_scaled_scores_1")]; tensor attention_8_masked_scaled_scores_1 = add(x = attention_8_scaled_scores_1, y = transpose_0)[name = string("attention_8_masked_scaled_scores_1")]; int32 softmax_17_axis_0 = const()[name = string("softmax_17_axis_0"), val = int32(-2)]; tensor softmax_17 = softmax(axis = softmax_17_axis_0, x = attention_8_masked_scaled_scores_1)[name = string("softmax_17")]; bool attention_8_attention_1_transpose_x_0 = const()[name = string("attention_8_attention_1_transpose_x_0"), val = bool(true)]; bool attention_8_attention_1_transpose_y_0 = const()[name = string("attention_8_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_8_attention_1 = matmul(transpose_x = attention_8_attention_1_transpose_x_0, transpose_y = attention_8_attention_1_transpose_y_0, x = softmax_17, y = attention_8_slice_value_cache_heads_1)[name = string("attention_8_attention_1")]; int32 attention_8_concat_attention_all_heads_axis_0 = const()[name = string("attention_8_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_8_concat_attention_all_heads_interleave_0 = const()[name = string("attention_8_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_8_concat_attention_all_heads = concat(axis = attention_8_concat_attention_all_heads_axis_0, interleave = attention_8_concat_attention_all_heads_interleave_0, values = (attention_8_attention_0, attention_8_attention_1))[name = string("attention_8_concat_attention_all_heads")]; tensor attention_8_channels_first_retransposed_perm_0 = const()[name = string("attention_8_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_8_reshaped_shape_0 = const()[name = string("attention_8_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_8_channels_first_retransposed = transpose(perm = attention_8_channels_first_retransposed_perm_0, x = attention_8_concat_attention_all_heads)[name = string("transpose_31")]; tensor attention_8_reshaped = reshape(shape = attention_8_reshaped_shape_0, x = attention_8_channels_first_retransposed)[name = string("attention_8_reshaped")]; tensor attention_8_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384907776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385509952))))[name = string("attention_8_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_36 = constexpr_blockwise_shift_scale(data = attention_8_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385538688))))[name = string("constexpr_blockwise_shift_scale_36")]; tensor attention_8_outproj_strides_0 = const()[name = string("attention_8_outproj_strides_0"), val = tensor([1])]; string attention_8_outproj_pad_type_0 = const()[name = string("attention_8_outproj_pad_type_0"), val = string("valid")]; tensor attention_8_outproj_pad_0 = const()[name = string("attention_8_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_8_outproj_dilations_0 = const()[name = string("attention_8_outproj_dilations_0"), val = tensor([1])]; int32 attention_8_outproj_groups_0 = const()[name = string("attention_8_outproj_groups_0"), val = int32(1)]; tensor attention_8_outproj = conv(dilations = attention_8_outproj_dilations_0, groups = attention_8_outproj_groups_0, pad = attention_8_outproj_pad_0, pad_type = attention_8_outproj_pad_type_0, strides = attention_8_outproj_strides_0, weight = constexpr_blockwise_shift_scale_36, x = attention_8_reshaped)[name = string("attention_8_outproj")]; tensor block_8_residual_1 = add(x = block_7_residual_2, y = attention_8_outproj)[name = string("block_8_residual_1")]; tensor block_8_ffn_rmsnorm_abs = abs(x = block_8_residual_1)[name = string("block_8_ffn_rmsnorm_abs")]; tensor block_8_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_8_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_8_ffn_rmsnorm_maxval = reduce_max(axes = block_8_ffn_rmsnorm_maxval_axes_0, keep_dims = block_8_ffn_rmsnorm_maxval_keep_dims_0, x = block_8_ffn_rmsnorm_abs)[name = string("block_8_ffn_rmsnorm_maxval")]; fp16 block_8_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_8_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_8_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_8_ffn_rmsnorm_maxval_clipped = clip(alpha = block_8_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_8_ffn_rmsnorm_maxval_clipped_beta_0, x = block_8_ffn_rmsnorm_maxval)[name = string("block_8_ffn_rmsnorm_maxval_clipped")]; tensor block_8_ffn_rmsnorm_scaled = real_div(x = block_8_residual_1, y = block_8_ffn_rmsnorm_maxval_clipped)[name = string("block_8_ffn_rmsnorm_scaled")]; tensor block_8_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_8_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_8_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_8_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_8_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_8_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_8_ffn_rmsnorm_scaled)[name = string("block_8_ffn_rmsnorm_squared_sum")]; fp16 block_8_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_8_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_8_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_8_ffn_rmsnorm_rsqrt_epsilon_0, x = block_8_ffn_rmsnorm_squared_sum)[name = string("block_8_ffn_rmsnorm_rsqrt")]; fp16 block_8_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_8_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_8_ffn_rmsnorm_dim_scaled = mul(x = block_8_ffn_rmsnorm_scaled, y = block_8_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_8_ffn_rmsnorm_dim_scaled")]; tensor block_8_ffn_rmsnorm_normalized = mul(x = block_8_ffn_rmsnorm_dim_scaled, y = block_8_ffn_rmsnorm_rsqrt)[name = string("block_8_ffn_rmsnorm_normalized")]; tensor block_8_ffn_rmsnorm_y_0 = const()[name = string("block_8_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385540544)))]; tensor block_8_ffn_rmsnorm = mul(x = block_8_ffn_rmsnorm_normalized, y = block_8_ffn_rmsnorm_y_0)[name = string("block_8_ffn_rmsnorm")]; tensor block_8_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385542400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388811072))))[name = string("block_8_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_37 = constexpr_blockwise_shift_scale(data = block_8_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388966784))))[name = string("constexpr_blockwise_shift_scale_37")]; tensor block_8_ffn_inproj_strides_0 = const()[name = string("block_8_ffn_inproj_strides_0"), val = tensor([1])]; string block_8_ffn_inproj_pad_type_0 = const()[name = string("block_8_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_8_ffn_inproj_pad_0 = const()[name = string("block_8_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_8_ffn_inproj_dilations_0 = const()[name = string("block_8_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_8_ffn_inproj_groups_0 = const()[name = string("block_8_ffn_inproj_groups_0"), val = int32(1)]; tensor block_8_ffn_inproj = conv(dilations = block_8_ffn_inproj_dilations_0, groups = block_8_ffn_inproj_groups_0, pad = block_8_ffn_inproj_pad_0, pad_type = block_8_ffn_inproj_pad_type_0, strides = block_8_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_37, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_inproj")]; tensor block_8_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388976576))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392245248))))[name = string("block_8_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_38 = constexpr_blockwise_shift_scale(data = block_8_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392400960))))[name = string("constexpr_blockwise_shift_scale_38")]; tensor block_8_ffn_g_strides_0 = const()[name = string("block_8_ffn_g_strides_0"), val = tensor([1])]; string block_8_ffn_g_pad_type_0 = const()[name = string("block_8_ffn_g_pad_type_0"), val = string("valid")]; tensor block_8_ffn_g_pad_0 = const()[name = string("block_8_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_8_ffn_g_dilations_0 = const()[name = string("block_8_ffn_g_dilations_0"), val = tensor([1])]; int32 block_8_ffn_g_groups_0 = const()[name = string("block_8_ffn_g_groups_0"), val = int32(1)]; tensor block_8_ffn_g = conv(dilations = block_8_ffn_g_dilations_0, groups = block_8_ffn_g_groups_0, pad = block_8_ffn_g_pad_0, pad_type = block_8_ffn_g_pad_type_0, strides = block_8_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_38, x = block_8_ffn_rmsnorm)[name = string("block_8_ffn_g")]; tensor block_8_ffn_g_activation = silu(x = block_8_ffn_g)[name = string("block_8_ffn_g_activation")]; tensor block_8_ffn_x_gated = mul(x = block_8_ffn_inproj, y = block_8_ffn_g_activation)[name = string("block_8_ffn_x_gated")]; tensor block_8_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392410752))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395679424))))[name = string("block_8_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_39 = constexpr_blockwise_shift_scale(data = block_8_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395708160))))[name = string("constexpr_blockwise_shift_scale_39")]; tensor block_8_ffn_outproj_strides_0 = const()[name = string("block_8_ffn_outproj_strides_0"), val = tensor([1])]; string block_8_ffn_outproj_pad_type_0 = const()[name = string("block_8_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_8_ffn_outproj_pad_0 = const()[name = string("block_8_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_8_ffn_outproj_dilations_0 = const()[name = string("block_8_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_8_ffn_outproj_groups_0 = const()[name = string("block_8_ffn_outproj_groups_0"), val = int32(1)]; tensor block_8_ffn_outproj = conv(dilations = block_8_ffn_outproj_dilations_0, groups = block_8_ffn_outproj_groups_0, pad = block_8_ffn_outproj_pad_0, pad_type = block_8_ffn_outproj_pad_type_0, strides = block_8_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_39, x = block_8_ffn_x_gated)[name = string("block_8_ffn_outproj")]; tensor block_8_residual_2 = add(x = block_8_ffn_outproj, y = block_8_residual_1)[name = string("block_8_residual_2")]; tensor block_9_attention_rmsnorm_abs = abs(x = block_8_residual_2)[name = string("block_9_attention_rmsnorm_abs")]; tensor block_9_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_9_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_9_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_9_attention_rmsnorm_maxval = reduce_max(axes = block_9_attention_rmsnorm_maxval_axes_0, keep_dims = block_9_attention_rmsnorm_maxval_keep_dims_0, x = block_9_attention_rmsnorm_abs)[name = string("block_9_attention_rmsnorm_maxval")]; fp16 block_9_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_9_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_9_attention_rmsnorm_maxval_clipped = clip(alpha = block_9_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_9_attention_rmsnorm_maxval_clipped_beta_0, x = block_9_attention_rmsnorm_maxval)[name = string("block_9_attention_rmsnorm_maxval_clipped")]; tensor block_9_attention_rmsnorm_scaled = real_div(x = block_8_residual_2, y = block_9_attention_rmsnorm_maxval_clipped)[name = string("block_9_attention_rmsnorm_scaled")]; tensor block_9_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_9_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_9_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_9_attention_rmsnorm_squared_sum_keep_dims_0, x = block_9_attention_rmsnorm_scaled)[name = string("block_9_attention_rmsnorm_squared_sum")]; fp16 block_9_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_9_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_9_attention_rmsnorm_rsqrt_epsilon_0, x = block_9_attention_rmsnorm_squared_sum)[name = string("block_9_attention_rmsnorm_rsqrt")]; fp16 block_9_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_9_attention_rmsnorm_dim_scaled = mul(x = block_9_attention_rmsnorm_scaled, y = block_9_attention_rmsnorm_dim_scaled_y_0)[name = string("block_9_attention_rmsnorm_dim_scaled")]; tensor block_9_attention_rmsnorm_normalized = mul(x = block_9_attention_rmsnorm_dim_scaled, y = block_9_attention_rmsnorm_rsqrt)[name = string("block_9_attention_rmsnorm_normalized")]; tensor block_9_attention_rmsnorm_y_0 = const()[name = string("block_9_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395710016)))]; tensor block_9_attention_rmsnorm = mul(x = block_9_attention_rmsnorm_normalized, y = block_9_attention_rmsnorm_y_0)[name = string("block_9_attention_rmsnorm")]; tensor attention_9_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(395711872))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396486080))))[name = string("attention_9_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_40 = constexpr_blockwise_shift_scale(data = attention_9_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396523008))))[name = string("constexpr_blockwise_shift_scale_40")]; tensor attention_9_qkvproj_bias_0 = const()[name = string("attention_9_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396525376)))]; tensor attention_9_qkvproj_strides_0 = const()[name = string("attention_9_qkvproj_strides_0"), val = tensor([1])]; string attention_9_qkvproj_pad_type_0 = const()[name = string("attention_9_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_9_qkvproj_pad_0 = const()[name = string("attention_9_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_9_qkvproj_dilations_0 = const()[name = string("attention_9_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_9_qkvproj_groups_0 = const()[name = string("attention_9_qkvproj_groups_0"), val = int32(1)]; tensor attention_9_qkvproj = conv(bias = attention_9_qkvproj_bias_0, dilations = attention_9_qkvproj_dilations_0, groups = attention_9_qkvproj_groups_0, pad = attention_9_qkvproj_pad_0, pad_type = attention_9_qkvproj_pad_type_0, strides = attention_9_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_40, x = block_9_attention_rmsnorm)[name = string("attention_9_qkvproj")]; tensor attention_9_head_reshape_shape_0 = const()[name = string("attention_9_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_9_head_reshape = reshape(shape = attention_9_head_reshape_shape_0, x = attention_9_qkvproj)[name = string("attention_9_head_reshape")]; tensor attention_9_head_transpose_perm_0 = const()[name = string("attention_9_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_9_split_qkv_heads_axis_0 = const()[name = string("attention_9_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_9_split_qkv_heads_split_sizes_0 = const()[name = string("attention_9_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_9_head_transpose = transpose(perm = attention_9_head_transpose_perm_0, x = attention_9_head_reshape)[name = string("transpose_30")]; tensor attention_9_split_qkv_heads_0, tensor attention_9_split_qkv_heads_1, tensor attention_9_split_qkv_heads_2 = split(axis = attention_9_split_qkv_heads_axis_0, split_sizes = attention_9_split_qkv_heads_split_sizes_0, x = attention_9_head_transpose)[name = string("attention_9_split_qkv_heads")]; tensor attention_9_q_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_9_q_rope_lhs_mult")]; int32 attention_9_q_rotate_half_split_num_splits_0 = const()[name = string("attention_9_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_9_q_rotate_half_split_axis_0 = const()[name = string("attention_9_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_9_q_rotate_half_split_0, tensor attention_9_q_rotate_half_split_1 = split(axis = attention_9_q_rotate_half_split_axis_0, num_splits = attention_9_q_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_0)[name = string("attention_9_q_rotate_half_split")]; fp16 attention_9_q_rotate_half_neg_y_0 = const()[name = string("attention_9_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_9_q_rotate_half_neg = mul(x = attention_9_q_rotate_half_split_1, y = attention_9_q_rotate_half_neg_y_0)[name = string("attention_9_q_rotate_half_neg")]; int32 attention_9_q_rotate_half_concat_axis_0 = const()[name = string("attention_9_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_9_q_rotate_half_concat_interleave_0 = const()[name = string("attention_9_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_9_q_rotate_half_concat = concat(axis = attention_9_q_rotate_half_concat_axis_0, interleave = attention_9_q_rotate_half_concat_interleave_0, values = (attention_9_q_rotate_half_neg, attention_9_q_rotate_half_split_0))[name = string("attention_9_q_rotate_half_concat")]; tensor attention_9_q_rope_rhs_mult = mul(x = attention_9_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_q_rope_rhs_mult")]; tensor attention_9_q_rope = add(x = attention_9_q_rope_lhs_mult, y = attention_9_q_rope_rhs_mult)[name = string("attention_9_q_rope")]; tensor attention_9_k_rope_lhs_mult = mul(x = attention_9_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_9_k_rope_lhs_mult")]; int32 attention_9_k_rotate_half_split_num_splits_0 = const()[name = string("attention_9_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_9_k_rotate_half_split_axis_0 = const()[name = string("attention_9_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_9_k_rotate_half_split_0, tensor attention_9_k_rotate_half_split_1 = split(axis = attention_9_k_rotate_half_split_axis_0, num_splits = attention_9_k_rotate_half_split_num_splits_0, x = attention_9_split_qkv_heads_1)[name = string("attention_9_k_rotate_half_split")]; fp16 attention_9_k_rotate_half_neg_y_0 = const()[name = string("attention_9_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_9_k_rotate_half_neg = mul(x = attention_9_k_rotate_half_split_1, y = attention_9_k_rotate_half_neg_y_0)[name = string("attention_9_k_rotate_half_neg")]; int32 attention_9_k_rotate_half_concat_axis_0 = const()[name = string("attention_9_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_9_k_rotate_half_concat_interleave_0 = const()[name = string("attention_9_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_9_k_rotate_half_concat = concat(axis = attention_9_k_rotate_half_concat_axis_0, interleave = attention_9_k_rotate_half_concat_interleave_0, values = (attention_9_k_rotate_half_neg, attention_9_k_rotate_half_split_0))[name = string("attention_9_k_rotate_half_concat")]; tensor attention_9_k_rope_rhs_mult = mul(x = attention_9_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_9_k_rope_rhs_mult")]; tensor attention_9_k_rope = add(x = attention_9_k_rope_lhs_mult, y = attention_9_k_rope_rhs_mult)[name = string("attention_9_k_rope")]; int32 attention_9_q_splits_axis_0 = const()[name = string("attention_9_q_splits_axis_0"), val = int32(1)]; int32 attention_9_q_splits_num_splits_0 = const()[name = string("attention_9_q_splits_num_splits_0"), val = int32(2)]; tensor attention_9_q_splits_0, tensor attention_9_q_splits_1 = split(axis = attention_9_q_splits_axis_0, num_splits = attention_9_q_splits_num_splits_0, x = attention_9_q_rope)[name = string("attention_9_q_splits")]; tensor attention_9_update_begin_0_values0_0 = const()[name = string("attention_9_update_begin_0_values0_0"), val = tensor([9])]; tensor attention_9_update_begin_0_values1_0 = const()[name = string("attention_9_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_9_update_begin_0_values3_0 = const()[name = string("attention_9_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_9_update_begin_0_axis_0 = const()[name = string("attention_9_update_begin_0_axis_0"), val = int32(0)]; bool attention_9_update_begin_0_interleave_0 = const()[name = string("attention_9_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_9_update_begin_0 = concat(axis = attention_9_update_begin_0_axis_0, interleave = attention_9_update_begin_0_interleave_0, values = (attention_9_update_begin_0_values0_0, attention_9_update_begin_0_values1_0, query_pos1, attention_9_update_begin_0_values3_0))[name = string("attention_9_update_begin_0")]; tensor attention_9_update_end_0_values0_0 = const()[name = string("attention_9_update_end_0_values0_0"), val = tensor([10])]; tensor attention_9_update_end_0_values1_0 = const()[name = string("attention_9_update_end_0_values1_0"), val = tensor([2])]; tensor attention_9_update_end_0_values3_0 = const()[name = string("attention_9_update_end_0_values3_0"), val = tensor([64])]; int32 attention_9_update_end_0_axis_0 = const()[name = string("attention_9_update_end_0_axis_0"), val = int32(0)]; bool attention_9_update_end_0_interleave_0 = const()[name = string("attention_9_update_end_0_interleave_0"), val = bool(false)]; tensor attention_9_update_end_0 = concat(axis = attention_9_update_end_0_axis_0, interleave = attention_9_update_end_0_interleave_0, values = (attention_9_update_end_0_values0_0, attention_9_update_end_0_values1_0, end_pos_0, attention_9_update_end_0_values3_0))[name = string("attention_9_update_end_0")]; tensor attention_9_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_updated_key_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_key_cache_0_squeeze_mask_0, update = attention_9_k_rope, x = coreml_update_state_16)[name = string("attention_9_updated_key_cache_0")]; write_state(data = attention_9_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_18 = read_state(input = key_cache_state)[name = string("coreml_update_state_66")]; tensor attention_9_key_cache_begin_0 = const()[name = string("attention_9_key_cache_begin_0"), val = tensor([9, 0, 0, 0])]; tensor attention_9_key_cache_end_0 = const()[name = string("attention_9_key_cache_end_0"), val = tensor([10, 2, 512, 64])]; tensor attention_9_key_cache_squeeze_mask_0 = const()[name = string("attention_9_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_key_cache = slice_by_index(begin = attention_9_key_cache_begin_0, end = attention_9_key_cache_end_0, squeeze_mask = attention_9_key_cache_squeeze_mask_0, x = coreml_update_state_18)[name = string("attention_9_key_cache")]; int32 attention_9_key_cache_head_axis_0 = const()[name = string("attention_9_key_cache_head_axis_0"), val = int32(1)]; int32 attention_9_key_cache_head_num_splits_0 = const()[name = string("attention_9_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_9_key_cache_head_0, tensor attention_9_key_cache_head_1 = split(axis = attention_9_key_cache_head_axis_0, num_splits = attention_9_key_cache_head_num_splits_0, x = attention_9_key_cache)[name = string("attention_9_key_cache_head")]; tensor attention_9_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_9_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_updated_value_cache_0 = slice_update(begin = attention_9_update_begin_0, end = attention_9_update_end_0, squeeze_mask = attention_9_updated_value_cache_0_squeeze_mask_0, update = attention_9_split_qkv_heads_2, x = coreml_update_state_17)[name = string("attention_9_updated_value_cache_0")]; write_state(data = attention_9_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_19 = read_state(input = value_cache_state)[name = string("coreml_update_state_67")]; tensor attention_9_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_9_slice_current_layer_value_cache_begin_0"), val = tensor([9, 0, 0, 0])]; tensor attention_9_slice_current_layer_value_cache_end_0 = const()[name = string("attention_9_slice_current_layer_value_cache_end_0"), val = tensor([10, 2, 512, 64])]; tensor attention_9_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_9_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_9_slice_current_layer_value_cache = slice_by_index(begin = attention_9_slice_current_layer_value_cache_begin_0, end = attention_9_slice_current_layer_value_cache_end_0, squeeze_mask = attention_9_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_19)[name = string("attention_9_slice_current_layer_value_cache")]; int32 attention_9_slice_value_cache_heads_axis_0 = const()[name = string("attention_9_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_9_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_9_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_9_slice_value_cache_heads_0, tensor attention_9_slice_value_cache_heads_1 = split(axis = attention_9_slice_value_cache_heads_axis_0, num_splits = attention_9_slice_value_cache_heads_num_splits_0, x = attention_9_slice_current_layer_value_cache)[name = string("attention_9_slice_value_cache_heads")]; bool attention_9_scores_0_transpose_y_0 = const()[name = string("attention_9_scores_0_transpose_y_0"), val = bool(true)]; bool attention_9_scores_0_transpose_x_0 = const()[name = string("attention_9_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_9_scores_0 = matmul(transpose_x = attention_9_scores_0_transpose_x_0, transpose_y = attention_9_scores_0_transpose_y_0, x = attention_9_key_cache_head_0, y = attention_9_q_splits_0)[name = string("attention_9_scores_0")]; fp16 attention_9_scaled_scores_0_y_0 = const()[name = string("attention_9_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_9_scaled_scores_0 = mul(x = attention_9_scores_0, y = attention_9_scaled_scores_0_y_0)[name = string("attention_9_scaled_scores_0")]; tensor attention_9_masked_scaled_scores_0 = add(x = attention_9_scaled_scores_0, y = transpose_0)[name = string("attention_9_masked_scaled_scores_0")]; int32 softmax_18_axis_0 = const()[name = string("softmax_18_axis_0"), val = int32(-2)]; tensor softmax_18 = softmax(axis = softmax_18_axis_0, x = attention_9_masked_scaled_scores_0)[name = string("softmax_18")]; bool attention_9_attention_0_transpose_x_0 = const()[name = string("attention_9_attention_0_transpose_x_0"), val = bool(true)]; bool attention_9_attention_0_transpose_y_0 = const()[name = string("attention_9_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_9_attention_0 = matmul(transpose_x = attention_9_attention_0_transpose_x_0, transpose_y = attention_9_attention_0_transpose_y_0, x = softmax_18, y = attention_9_slice_value_cache_heads_0)[name = string("attention_9_attention_0")]; bool attention_9_scores_1_transpose_y_0 = const()[name = string("attention_9_scores_1_transpose_y_0"), val = bool(true)]; bool attention_9_scores_1_transpose_x_0 = const()[name = string("attention_9_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_9_scores_1 = matmul(transpose_x = attention_9_scores_1_transpose_x_0, transpose_y = attention_9_scores_1_transpose_y_0, x = attention_9_key_cache_head_1, y = attention_9_q_splits_1)[name = string("attention_9_scores_1")]; fp16 attention_9_scaled_scores_1_y_0 = const()[name = string("attention_9_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_9_scaled_scores_1 = mul(x = attention_9_scores_1, y = attention_9_scaled_scores_1_y_0)[name = string("attention_9_scaled_scores_1")]; tensor attention_9_masked_scaled_scores_1 = add(x = attention_9_scaled_scores_1, y = transpose_0)[name = string("attention_9_masked_scaled_scores_1")]; int32 softmax_19_axis_0 = const()[name = string("softmax_19_axis_0"), val = int32(-2)]; tensor softmax_19 = softmax(axis = softmax_19_axis_0, x = attention_9_masked_scaled_scores_1)[name = string("softmax_19")]; bool attention_9_attention_1_transpose_x_0 = const()[name = string("attention_9_attention_1_transpose_x_0"), val = bool(true)]; bool attention_9_attention_1_transpose_y_0 = const()[name = string("attention_9_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_9_attention_1 = matmul(transpose_x = attention_9_attention_1_transpose_x_0, transpose_y = attention_9_attention_1_transpose_y_0, x = softmax_19, y = attention_9_slice_value_cache_heads_1)[name = string("attention_9_attention_1")]; int32 attention_9_concat_attention_all_heads_axis_0 = const()[name = string("attention_9_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_9_concat_attention_all_heads_interleave_0 = const()[name = string("attention_9_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_9_concat_attention_all_heads = concat(axis = attention_9_concat_attention_all_heads_axis_0, interleave = attention_9_concat_attention_all_heads_interleave_0, values = (attention_9_attention_0, attention_9_attention_1))[name = string("attention_9_concat_attention_all_heads")]; tensor attention_9_channels_first_retransposed_perm_0 = const()[name = string("attention_9_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_9_reshaped_shape_0 = const()[name = string("attention_9_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_9_channels_first_retransposed = transpose(perm = attention_9_channels_first_retransposed_perm_0, x = attention_9_concat_attention_all_heads)[name = string("transpose_29")]; tensor attention_9_reshaped = reshape(shape = attention_9_reshaped_shape_0, x = attention_9_channels_first_retransposed)[name = string("attention_9_reshaped")]; tensor attention_9_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396527744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397129920))))[name = string("attention_9_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_41 = constexpr_blockwise_shift_scale(data = attention_9_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397158656))))[name = string("constexpr_blockwise_shift_scale_41")]; tensor attention_9_outproj_strides_0 = const()[name = string("attention_9_outproj_strides_0"), val = tensor([1])]; string attention_9_outproj_pad_type_0 = const()[name = string("attention_9_outproj_pad_type_0"), val = string("valid")]; tensor attention_9_outproj_pad_0 = const()[name = string("attention_9_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_9_outproj_dilations_0 = const()[name = string("attention_9_outproj_dilations_0"), val = tensor([1])]; int32 attention_9_outproj_groups_0 = const()[name = string("attention_9_outproj_groups_0"), val = int32(1)]; tensor attention_9_outproj = conv(dilations = attention_9_outproj_dilations_0, groups = attention_9_outproj_groups_0, pad = attention_9_outproj_pad_0, pad_type = attention_9_outproj_pad_type_0, strides = attention_9_outproj_strides_0, weight = constexpr_blockwise_shift_scale_41, x = attention_9_reshaped)[name = string("attention_9_outproj")]; tensor block_9_residual_1 = add(x = block_8_residual_2, y = attention_9_outproj)[name = string("block_9_residual_1")]; tensor block_9_ffn_rmsnorm_abs = abs(x = block_9_residual_1)[name = string("block_9_ffn_rmsnorm_abs")]; tensor block_9_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_9_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_9_ffn_rmsnorm_maxval = reduce_max(axes = block_9_ffn_rmsnorm_maxval_axes_0, keep_dims = block_9_ffn_rmsnorm_maxval_keep_dims_0, x = block_9_ffn_rmsnorm_abs)[name = string("block_9_ffn_rmsnorm_maxval")]; fp16 block_9_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_9_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_9_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_9_ffn_rmsnorm_maxval_clipped = clip(alpha = block_9_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_9_ffn_rmsnorm_maxval_clipped_beta_0, x = block_9_ffn_rmsnorm_maxval)[name = string("block_9_ffn_rmsnorm_maxval_clipped")]; tensor block_9_ffn_rmsnorm_scaled = real_div(x = block_9_residual_1, y = block_9_ffn_rmsnorm_maxval_clipped)[name = string("block_9_ffn_rmsnorm_scaled")]; tensor block_9_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_9_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_9_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_9_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_9_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_9_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_9_ffn_rmsnorm_scaled)[name = string("block_9_ffn_rmsnorm_squared_sum")]; fp16 block_9_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_9_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_9_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_9_ffn_rmsnorm_rsqrt_epsilon_0, x = block_9_ffn_rmsnorm_squared_sum)[name = string("block_9_ffn_rmsnorm_rsqrt")]; fp16 block_9_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_9_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_9_ffn_rmsnorm_dim_scaled = mul(x = block_9_ffn_rmsnorm_scaled, y = block_9_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_9_ffn_rmsnorm_dim_scaled")]; tensor block_9_ffn_rmsnorm_normalized = mul(x = block_9_ffn_rmsnorm_dim_scaled, y = block_9_ffn_rmsnorm_rsqrt)[name = string("block_9_ffn_rmsnorm_normalized")]; tensor block_9_ffn_rmsnorm_y_0 = const()[name = string("block_9_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397160512)))]; tensor block_9_ffn_rmsnorm = mul(x = block_9_ffn_rmsnorm_normalized, y = block_9_ffn_rmsnorm_y_0)[name = string("block_9_ffn_rmsnorm")]; tensor block_9_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397162368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400431040))))[name = string("block_9_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_42 = constexpr_blockwise_shift_scale(data = block_9_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400586752))))[name = string("constexpr_blockwise_shift_scale_42")]; tensor block_9_ffn_inproj_strides_0 = const()[name = string("block_9_ffn_inproj_strides_0"), val = tensor([1])]; string block_9_ffn_inproj_pad_type_0 = const()[name = string("block_9_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_9_ffn_inproj_pad_0 = const()[name = string("block_9_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_9_ffn_inproj_dilations_0 = const()[name = string("block_9_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_9_ffn_inproj_groups_0 = const()[name = string("block_9_ffn_inproj_groups_0"), val = int32(1)]; tensor block_9_ffn_inproj = conv(dilations = block_9_ffn_inproj_dilations_0, groups = block_9_ffn_inproj_groups_0, pad = block_9_ffn_inproj_pad_0, pad_type = block_9_ffn_inproj_pad_type_0, strides = block_9_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_42, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_inproj")]; tensor block_9_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400596544))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(403865216))))[name = string("block_9_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_43 = constexpr_blockwise_shift_scale(data = block_9_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404020928))))[name = string("constexpr_blockwise_shift_scale_43")]; tensor block_9_ffn_g_strides_0 = const()[name = string("block_9_ffn_g_strides_0"), val = tensor([1])]; string block_9_ffn_g_pad_type_0 = const()[name = string("block_9_ffn_g_pad_type_0"), val = string("valid")]; tensor block_9_ffn_g_pad_0 = const()[name = string("block_9_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_9_ffn_g_dilations_0 = const()[name = string("block_9_ffn_g_dilations_0"), val = tensor([1])]; int32 block_9_ffn_g_groups_0 = const()[name = string("block_9_ffn_g_groups_0"), val = int32(1)]; tensor block_9_ffn_g = conv(dilations = block_9_ffn_g_dilations_0, groups = block_9_ffn_g_groups_0, pad = block_9_ffn_g_pad_0, pad_type = block_9_ffn_g_pad_type_0, strides = block_9_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_43, x = block_9_ffn_rmsnorm)[name = string("block_9_ffn_g")]; tensor block_9_ffn_g_activation = silu(x = block_9_ffn_g)[name = string("block_9_ffn_g_activation")]; tensor block_9_ffn_x_gated = mul(x = block_9_ffn_inproj, y = block_9_ffn_g_activation)[name = string("block_9_ffn_x_gated")]; tensor block_9_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404030720))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407299392))))[name = string("block_9_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_44 = constexpr_blockwise_shift_scale(data = block_9_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407328128))))[name = string("constexpr_blockwise_shift_scale_44")]; tensor block_9_ffn_outproj_strides_0 = const()[name = string("block_9_ffn_outproj_strides_0"), val = tensor([1])]; string block_9_ffn_outproj_pad_type_0 = const()[name = string("block_9_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_9_ffn_outproj_pad_0 = const()[name = string("block_9_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_9_ffn_outproj_dilations_0 = const()[name = string("block_9_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_9_ffn_outproj_groups_0 = const()[name = string("block_9_ffn_outproj_groups_0"), val = int32(1)]; tensor block_9_ffn_outproj = conv(dilations = block_9_ffn_outproj_dilations_0, groups = block_9_ffn_outproj_groups_0, pad = block_9_ffn_outproj_pad_0, pad_type = block_9_ffn_outproj_pad_type_0, strides = block_9_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_44, x = block_9_ffn_x_gated)[name = string("block_9_ffn_outproj")]; tensor block_9_residual_2 = add(x = block_9_ffn_outproj, y = block_9_residual_1)[name = string("block_9_residual_2")]; tensor block_10_attention_rmsnorm_abs = abs(x = block_9_residual_2)[name = string("block_10_attention_rmsnorm_abs")]; tensor block_10_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_10_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_10_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_10_attention_rmsnorm_maxval = reduce_max(axes = block_10_attention_rmsnorm_maxval_axes_0, keep_dims = block_10_attention_rmsnorm_maxval_keep_dims_0, x = block_10_attention_rmsnorm_abs)[name = string("block_10_attention_rmsnorm_maxval")]; fp16 block_10_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_10_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_10_attention_rmsnorm_maxval_clipped = clip(alpha = block_10_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_10_attention_rmsnorm_maxval_clipped_beta_0, x = block_10_attention_rmsnorm_maxval)[name = string("block_10_attention_rmsnorm_maxval_clipped")]; tensor block_10_attention_rmsnorm_scaled = real_div(x = block_9_residual_2, y = block_10_attention_rmsnorm_maxval_clipped)[name = string("block_10_attention_rmsnorm_scaled")]; tensor block_10_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_10_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_10_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_10_attention_rmsnorm_squared_sum_keep_dims_0, x = block_10_attention_rmsnorm_scaled)[name = string("block_10_attention_rmsnorm_squared_sum")]; fp16 block_10_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_10_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_10_attention_rmsnorm_rsqrt_epsilon_0, x = block_10_attention_rmsnorm_squared_sum)[name = string("block_10_attention_rmsnorm_rsqrt")]; fp16 block_10_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_10_attention_rmsnorm_dim_scaled = mul(x = block_10_attention_rmsnorm_scaled, y = block_10_attention_rmsnorm_dim_scaled_y_0)[name = string("block_10_attention_rmsnorm_dim_scaled")]; tensor block_10_attention_rmsnorm_normalized = mul(x = block_10_attention_rmsnorm_dim_scaled, y = block_10_attention_rmsnorm_rsqrt)[name = string("block_10_attention_rmsnorm_normalized")]; tensor block_10_attention_rmsnorm_y_0 = const()[name = string("block_10_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407329984)))]; tensor block_10_attention_rmsnorm = mul(x = block_10_attention_rmsnorm_normalized, y = block_10_attention_rmsnorm_y_0)[name = string("block_10_attention_rmsnorm")]; tensor attention_10_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(407331840))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408106048))))[name = string("attention_10_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_45 = constexpr_blockwise_shift_scale(data = attention_10_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408142976))))[name = string("constexpr_blockwise_shift_scale_45")]; tensor attention_10_qkvproj_bias_0 = const()[name = string("attention_10_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408145344)))]; tensor attention_10_qkvproj_strides_0 = const()[name = string("attention_10_qkvproj_strides_0"), val = tensor([1])]; string attention_10_qkvproj_pad_type_0 = const()[name = string("attention_10_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_10_qkvproj_pad_0 = const()[name = string("attention_10_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_10_qkvproj_dilations_0 = const()[name = string("attention_10_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_10_qkvproj_groups_0 = const()[name = string("attention_10_qkvproj_groups_0"), val = int32(1)]; tensor attention_10_qkvproj = conv(bias = attention_10_qkvproj_bias_0, dilations = attention_10_qkvproj_dilations_0, groups = attention_10_qkvproj_groups_0, pad = attention_10_qkvproj_pad_0, pad_type = attention_10_qkvproj_pad_type_0, strides = attention_10_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_45, x = block_10_attention_rmsnorm)[name = string("attention_10_qkvproj")]; tensor attention_10_head_reshape_shape_0 = const()[name = string("attention_10_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_10_head_reshape = reshape(shape = attention_10_head_reshape_shape_0, x = attention_10_qkvproj)[name = string("attention_10_head_reshape")]; tensor attention_10_head_transpose_perm_0 = const()[name = string("attention_10_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_10_split_qkv_heads_axis_0 = const()[name = string("attention_10_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_10_split_qkv_heads_split_sizes_0 = const()[name = string("attention_10_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_10_head_transpose = transpose(perm = attention_10_head_transpose_perm_0, x = attention_10_head_reshape)[name = string("transpose_28")]; tensor attention_10_split_qkv_heads_0, tensor attention_10_split_qkv_heads_1, tensor attention_10_split_qkv_heads_2 = split(axis = attention_10_split_qkv_heads_axis_0, split_sizes = attention_10_split_qkv_heads_split_sizes_0, x = attention_10_head_transpose)[name = string("attention_10_split_qkv_heads")]; tensor attention_10_q_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_10_q_rope_lhs_mult")]; int32 attention_10_q_rotate_half_split_num_splits_0 = const()[name = string("attention_10_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_10_q_rotate_half_split_axis_0 = const()[name = string("attention_10_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_10_q_rotate_half_split_0, tensor attention_10_q_rotate_half_split_1 = split(axis = attention_10_q_rotate_half_split_axis_0, num_splits = attention_10_q_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_0)[name = string("attention_10_q_rotate_half_split")]; fp16 attention_10_q_rotate_half_neg_y_0 = const()[name = string("attention_10_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_10_q_rotate_half_neg = mul(x = attention_10_q_rotate_half_split_1, y = attention_10_q_rotate_half_neg_y_0)[name = string("attention_10_q_rotate_half_neg")]; int32 attention_10_q_rotate_half_concat_axis_0 = const()[name = string("attention_10_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_10_q_rotate_half_concat_interleave_0 = const()[name = string("attention_10_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_10_q_rotate_half_concat = concat(axis = attention_10_q_rotate_half_concat_axis_0, interleave = attention_10_q_rotate_half_concat_interleave_0, values = (attention_10_q_rotate_half_neg, attention_10_q_rotate_half_split_0))[name = string("attention_10_q_rotate_half_concat")]; tensor attention_10_q_rope_rhs_mult = mul(x = attention_10_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_q_rope_rhs_mult")]; tensor attention_10_q_rope = add(x = attention_10_q_rope_lhs_mult, y = attention_10_q_rope_rhs_mult)[name = string("attention_10_q_rope")]; tensor attention_10_k_rope_lhs_mult = mul(x = attention_10_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_10_k_rope_lhs_mult")]; int32 attention_10_k_rotate_half_split_num_splits_0 = const()[name = string("attention_10_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_10_k_rotate_half_split_axis_0 = const()[name = string("attention_10_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_10_k_rotate_half_split_0, tensor attention_10_k_rotate_half_split_1 = split(axis = attention_10_k_rotate_half_split_axis_0, num_splits = attention_10_k_rotate_half_split_num_splits_0, x = attention_10_split_qkv_heads_1)[name = string("attention_10_k_rotate_half_split")]; fp16 attention_10_k_rotate_half_neg_y_0 = const()[name = string("attention_10_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_10_k_rotate_half_neg = mul(x = attention_10_k_rotate_half_split_1, y = attention_10_k_rotate_half_neg_y_0)[name = string("attention_10_k_rotate_half_neg")]; int32 attention_10_k_rotate_half_concat_axis_0 = const()[name = string("attention_10_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_10_k_rotate_half_concat_interleave_0 = const()[name = string("attention_10_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_10_k_rotate_half_concat = concat(axis = attention_10_k_rotate_half_concat_axis_0, interleave = attention_10_k_rotate_half_concat_interleave_0, values = (attention_10_k_rotate_half_neg, attention_10_k_rotate_half_split_0))[name = string("attention_10_k_rotate_half_concat")]; tensor attention_10_k_rope_rhs_mult = mul(x = attention_10_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_10_k_rope_rhs_mult")]; tensor attention_10_k_rope = add(x = attention_10_k_rope_lhs_mult, y = attention_10_k_rope_rhs_mult)[name = string("attention_10_k_rope")]; int32 attention_10_q_splits_axis_0 = const()[name = string("attention_10_q_splits_axis_0"), val = int32(1)]; int32 attention_10_q_splits_num_splits_0 = const()[name = string("attention_10_q_splits_num_splits_0"), val = int32(2)]; tensor attention_10_q_splits_0, tensor attention_10_q_splits_1 = split(axis = attention_10_q_splits_axis_0, num_splits = attention_10_q_splits_num_splits_0, x = attention_10_q_rope)[name = string("attention_10_q_splits")]; tensor attention_10_update_begin_0_values0_0 = const()[name = string("attention_10_update_begin_0_values0_0"), val = tensor([10])]; tensor attention_10_update_begin_0_values1_0 = const()[name = string("attention_10_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_10_update_begin_0_values3_0 = const()[name = string("attention_10_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_10_update_begin_0_axis_0 = const()[name = string("attention_10_update_begin_0_axis_0"), val = int32(0)]; bool attention_10_update_begin_0_interleave_0 = const()[name = string("attention_10_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_10_update_begin_0 = concat(axis = attention_10_update_begin_0_axis_0, interleave = attention_10_update_begin_0_interleave_0, values = (attention_10_update_begin_0_values0_0, attention_10_update_begin_0_values1_0, query_pos1, attention_10_update_begin_0_values3_0))[name = string("attention_10_update_begin_0")]; tensor attention_10_update_end_0_values0_0 = const()[name = string("attention_10_update_end_0_values0_0"), val = tensor([11])]; tensor attention_10_update_end_0_values1_0 = const()[name = string("attention_10_update_end_0_values1_0"), val = tensor([2])]; tensor attention_10_update_end_0_values3_0 = const()[name = string("attention_10_update_end_0_values3_0"), val = tensor([64])]; int32 attention_10_update_end_0_axis_0 = const()[name = string("attention_10_update_end_0_axis_0"), val = int32(0)]; bool attention_10_update_end_0_interleave_0 = const()[name = string("attention_10_update_end_0_interleave_0"), val = bool(false)]; tensor attention_10_update_end_0 = concat(axis = attention_10_update_end_0_axis_0, interleave = attention_10_update_end_0_interleave_0, values = (attention_10_update_end_0_values0_0, attention_10_update_end_0_values1_0, end_pos_0, attention_10_update_end_0_values3_0))[name = string("attention_10_update_end_0")]; tensor attention_10_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_updated_key_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_key_cache_0_squeeze_mask_0, update = attention_10_k_rope, x = coreml_update_state_18)[name = string("attention_10_updated_key_cache_0")]; write_state(data = attention_10_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_20 = read_state(input = key_cache_state)[name = string("coreml_update_state_68")]; tensor attention_10_key_cache_begin_0 = const()[name = string("attention_10_key_cache_begin_0"), val = tensor([10, 0, 0, 0])]; tensor attention_10_key_cache_end_0 = const()[name = string("attention_10_key_cache_end_0"), val = tensor([11, 2, 512, 64])]; tensor attention_10_key_cache_squeeze_mask_0 = const()[name = string("attention_10_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_key_cache = slice_by_index(begin = attention_10_key_cache_begin_0, end = attention_10_key_cache_end_0, squeeze_mask = attention_10_key_cache_squeeze_mask_0, x = coreml_update_state_20)[name = string("attention_10_key_cache")]; int32 attention_10_key_cache_head_axis_0 = const()[name = string("attention_10_key_cache_head_axis_0"), val = int32(1)]; int32 attention_10_key_cache_head_num_splits_0 = const()[name = string("attention_10_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_10_key_cache_head_0, tensor attention_10_key_cache_head_1 = split(axis = attention_10_key_cache_head_axis_0, num_splits = attention_10_key_cache_head_num_splits_0, x = attention_10_key_cache)[name = string("attention_10_key_cache_head")]; tensor attention_10_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_10_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_updated_value_cache_0 = slice_update(begin = attention_10_update_begin_0, end = attention_10_update_end_0, squeeze_mask = attention_10_updated_value_cache_0_squeeze_mask_0, update = attention_10_split_qkv_heads_2, x = coreml_update_state_19)[name = string("attention_10_updated_value_cache_0")]; write_state(data = attention_10_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_21 = read_state(input = value_cache_state)[name = string("coreml_update_state_69")]; tensor attention_10_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_10_slice_current_layer_value_cache_begin_0"), val = tensor([10, 0, 0, 0])]; tensor attention_10_slice_current_layer_value_cache_end_0 = const()[name = string("attention_10_slice_current_layer_value_cache_end_0"), val = tensor([11, 2, 512, 64])]; tensor attention_10_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_10_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_10_slice_current_layer_value_cache = slice_by_index(begin = attention_10_slice_current_layer_value_cache_begin_0, end = attention_10_slice_current_layer_value_cache_end_0, squeeze_mask = attention_10_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_21)[name = string("attention_10_slice_current_layer_value_cache")]; int32 attention_10_slice_value_cache_heads_axis_0 = const()[name = string("attention_10_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_10_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_10_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_10_slice_value_cache_heads_0, tensor attention_10_slice_value_cache_heads_1 = split(axis = attention_10_slice_value_cache_heads_axis_0, num_splits = attention_10_slice_value_cache_heads_num_splits_0, x = attention_10_slice_current_layer_value_cache)[name = string("attention_10_slice_value_cache_heads")]; bool attention_10_scores_0_transpose_y_0 = const()[name = string("attention_10_scores_0_transpose_y_0"), val = bool(true)]; bool attention_10_scores_0_transpose_x_0 = const()[name = string("attention_10_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_10_scores_0 = matmul(transpose_x = attention_10_scores_0_transpose_x_0, transpose_y = attention_10_scores_0_transpose_y_0, x = attention_10_key_cache_head_0, y = attention_10_q_splits_0)[name = string("attention_10_scores_0")]; fp16 attention_10_scaled_scores_0_y_0 = const()[name = string("attention_10_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_10_scaled_scores_0 = mul(x = attention_10_scores_0, y = attention_10_scaled_scores_0_y_0)[name = string("attention_10_scaled_scores_0")]; tensor attention_10_masked_scaled_scores_0 = add(x = attention_10_scaled_scores_0, y = transpose_0)[name = string("attention_10_masked_scaled_scores_0")]; int32 softmax_20_axis_0 = const()[name = string("softmax_20_axis_0"), val = int32(-2)]; tensor softmax_20 = softmax(axis = softmax_20_axis_0, x = attention_10_masked_scaled_scores_0)[name = string("softmax_20")]; bool attention_10_attention_0_transpose_x_0 = const()[name = string("attention_10_attention_0_transpose_x_0"), val = bool(true)]; bool attention_10_attention_0_transpose_y_0 = const()[name = string("attention_10_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_10_attention_0 = matmul(transpose_x = attention_10_attention_0_transpose_x_0, transpose_y = attention_10_attention_0_transpose_y_0, x = softmax_20, y = attention_10_slice_value_cache_heads_0)[name = string("attention_10_attention_0")]; bool attention_10_scores_1_transpose_y_0 = const()[name = string("attention_10_scores_1_transpose_y_0"), val = bool(true)]; bool attention_10_scores_1_transpose_x_0 = const()[name = string("attention_10_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_10_scores_1 = matmul(transpose_x = attention_10_scores_1_transpose_x_0, transpose_y = attention_10_scores_1_transpose_y_0, x = attention_10_key_cache_head_1, y = attention_10_q_splits_1)[name = string("attention_10_scores_1")]; fp16 attention_10_scaled_scores_1_y_0 = const()[name = string("attention_10_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_10_scaled_scores_1 = mul(x = attention_10_scores_1, y = attention_10_scaled_scores_1_y_0)[name = string("attention_10_scaled_scores_1")]; tensor attention_10_masked_scaled_scores_1 = add(x = attention_10_scaled_scores_1, y = transpose_0)[name = string("attention_10_masked_scaled_scores_1")]; int32 softmax_21_axis_0 = const()[name = string("softmax_21_axis_0"), val = int32(-2)]; tensor softmax_21 = softmax(axis = softmax_21_axis_0, x = attention_10_masked_scaled_scores_1)[name = string("softmax_21")]; bool attention_10_attention_1_transpose_x_0 = const()[name = string("attention_10_attention_1_transpose_x_0"), val = bool(true)]; bool attention_10_attention_1_transpose_y_0 = const()[name = string("attention_10_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_10_attention_1 = matmul(transpose_x = attention_10_attention_1_transpose_x_0, transpose_y = attention_10_attention_1_transpose_y_0, x = softmax_21, y = attention_10_slice_value_cache_heads_1)[name = string("attention_10_attention_1")]; int32 attention_10_concat_attention_all_heads_axis_0 = const()[name = string("attention_10_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_10_concat_attention_all_heads_interleave_0 = const()[name = string("attention_10_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_10_concat_attention_all_heads = concat(axis = attention_10_concat_attention_all_heads_axis_0, interleave = attention_10_concat_attention_all_heads_interleave_0, values = (attention_10_attention_0, attention_10_attention_1))[name = string("attention_10_concat_attention_all_heads")]; tensor attention_10_channels_first_retransposed_perm_0 = const()[name = string("attention_10_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_10_reshaped_shape_0 = const()[name = string("attention_10_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_10_channels_first_retransposed = transpose(perm = attention_10_channels_first_retransposed_perm_0, x = attention_10_concat_attention_all_heads)[name = string("transpose_27")]; tensor attention_10_reshaped = reshape(shape = attention_10_reshaped_shape_0, x = attention_10_channels_first_retransposed)[name = string("attention_10_reshaped")]; tensor attention_10_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408147712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408749888))))[name = string("attention_10_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_46 = constexpr_blockwise_shift_scale(data = attention_10_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408778624))))[name = string("constexpr_blockwise_shift_scale_46")]; tensor attention_10_outproj_strides_0 = const()[name = string("attention_10_outproj_strides_0"), val = tensor([1])]; string attention_10_outproj_pad_type_0 = const()[name = string("attention_10_outproj_pad_type_0"), val = string("valid")]; tensor attention_10_outproj_pad_0 = const()[name = string("attention_10_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_10_outproj_dilations_0 = const()[name = string("attention_10_outproj_dilations_0"), val = tensor([1])]; int32 attention_10_outproj_groups_0 = const()[name = string("attention_10_outproj_groups_0"), val = int32(1)]; tensor attention_10_outproj = conv(dilations = attention_10_outproj_dilations_0, groups = attention_10_outproj_groups_0, pad = attention_10_outproj_pad_0, pad_type = attention_10_outproj_pad_type_0, strides = attention_10_outproj_strides_0, weight = constexpr_blockwise_shift_scale_46, x = attention_10_reshaped)[name = string("attention_10_outproj")]; tensor block_10_residual_1 = add(x = block_9_residual_2, y = attention_10_outproj)[name = string("block_10_residual_1")]; tensor block_10_ffn_rmsnorm_abs = abs(x = block_10_residual_1)[name = string("block_10_ffn_rmsnorm_abs")]; tensor block_10_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_10_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_10_ffn_rmsnorm_maxval = reduce_max(axes = block_10_ffn_rmsnorm_maxval_axes_0, keep_dims = block_10_ffn_rmsnorm_maxval_keep_dims_0, x = block_10_ffn_rmsnorm_abs)[name = string("block_10_ffn_rmsnorm_maxval")]; fp16 block_10_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_10_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_10_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_10_ffn_rmsnorm_maxval_clipped = clip(alpha = block_10_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_10_ffn_rmsnorm_maxval_clipped_beta_0, x = block_10_ffn_rmsnorm_maxval)[name = string("block_10_ffn_rmsnorm_maxval_clipped")]; tensor block_10_ffn_rmsnorm_scaled = real_div(x = block_10_residual_1, y = block_10_ffn_rmsnorm_maxval_clipped)[name = string("block_10_ffn_rmsnorm_scaled")]; tensor block_10_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_10_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_10_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_10_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_10_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_10_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_10_ffn_rmsnorm_scaled)[name = string("block_10_ffn_rmsnorm_squared_sum")]; fp16 block_10_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_10_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_10_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_10_ffn_rmsnorm_rsqrt_epsilon_0, x = block_10_ffn_rmsnorm_squared_sum)[name = string("block_10_ffn_rmsnorm_rsqrt")]; fp16 block_10_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_10_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_10_ffn_rmsnorm_dim_scaled = mul(x = block_10_ffn_rmsnorm_scaled, y = block_10_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_10_ffn_rmsnorm_dim_scaled")]; tensor block_10_ffn_rmsnorm_normalized = mul(x = block_10_ffn_rmsnorm_dim_scaled, y = block_10_ffn_rmsnorm_rsqrt)[name = string("block_10_ffn_rmsnorm_normalized")]; tensor block_10_ffn_rmsnorm_y_0 = const()[name = string("block_10_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408780480)))]; tensor block_10_ffn_rmsnorm = mul(x = block_10_ffn_rmsnorm_normalized, y = block_10_ffn_rmsnorm_y_0)[name = string("block_10_ffn_rmsnorm")]; tensor block_10_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408782336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412051008))))[name = string("block_10_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_47 = constexpr_blockwise_shift_scale(data = block_10_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412206720))))[name = string("constexpr_blockwise_shift_scale_47")]; tensor block_10_ffn_inproj_strides_0 = const()[name = string("block_10_ffn_inproj_strides_0"), val = tensor([1])]; string block_10_ffn_inproj_pad_type_0 = const()[name = string("block_10_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_10_ffn_inproj_pad_0 = const()[name = string("block_10_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_10_ffn_inproj_dilations_0 = const()[name = string("block_10_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_10_ffn_inproj_groups_0 = const()[name = string("block_10_ffn_inproj_groups_0"), val = int32(1)]; tensor block_10_ffn_inproj = conv(dilations = block_10_ffn_inproj_dilations_0, groups = block_10_ffn_inproj_groups_0, pad = block_10_ffn_inproj_pad_0, pad_type = block_10_ffn_inproj_pad_type_0, strides = block_10_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_47, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_inproj")]; tensor block_10_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412216512))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415485184))))[name = string("block_10_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_48 = constexpr_blockwise_shift_scale(data = block_10_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415640896))))[name = string("constexpr_blockwise_shift_scale_48")]; tensor block_10_ffn_g_strides_0 = const()[name = string("block_10_ffn_g_strides_0"), val = tensor([1])]; string block_10_ffn_g_pad_type_0 = const()[name = string("block_10_ffn_g_pad_type_0"), val = string("valid")]; tensor block_10_ffn_g_pad_0 = const()[name = string("block_10_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_10_ffn_g_dilations_0 = const()[name = string("block_10_ffn_g_dilations_0"), val = tensor([1])]; int32 block_10_ffn_g_groups_0 = const()[name = string("block_10_ffn_g_groups_0"), val = int32(1)]; tensor block_10_ffn_g = conv(dilations = block_10_ffn_g_dilations_0, groups = block_10_ffn_g_groups_0, pad = block_10_ffn_g_pad_0, pad_type = block_10_ffn_g_pad_type_0, strides = block_10_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_48, x = block_10_ffn_rmsnorm)[name = string("block_10_ffn_g")]; tensor block_10_ffn_g_activation = silu(x = block_10_ffn_g)[name = string("block_10_ffn_g_activation")]; tensor block_10_ffn_x_gated = mul(x = block_10_ffn_inproj, y = block_10_ffn_g_activation)[name = string("block_10_ffn_x_gated")]; tensor block_10_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(415650688))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418919360))))[name = string("block_10_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_49 = constexpr_blockwise_shift_scale(data = block_10_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418948096))))[name = string("constexpr_blockwise_shift_scale_49")]; tensor block_10_ffn_outproj_strides_0 = const()[name = string("block_10_ffn_outproj_strides_0"), val = tensor([1])]; string block_10_ffn_outproj_pad_type_0 = const()[name = string("block_10_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_10_ffn_outproj_pad_0 = const()[name = string("block_10_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_10_ffn_outproj_dilations_0 = const()[name = string("block_10_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_10_ffn_outproj_groups_0 = const()[name = string("block_10_ffn_outproj_groups_0"), val = int32(1)]; tensor block_10_ffn_outproj = conv(dilations = block_10_ffn_outproj_dilations_0, groups = block_10_ffn_outproj_groups_0, pad = block_10_ffn_outproj_pad_0, pad_type = block_10_ffn_outproj_pad_type_0, strides = block_10_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_49, x = block_10_ffn_x_gated)[name = string("block_10_ffn_outproj")]; tensor block_10_residual_2 = add(x = block_10_ffn_outproj, y = block_10_residual_1)[name = string("block_10_residual_2")]; tensor block_11_attention_rmsnorm_abs = abs(x = block_10_residual_2)[name = string("block_11_attention_rmsnorm_abs")]; tensor block_11_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_11_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_11_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_11_attention_rmsnorm_maxval = reduce_max(axes = block_11_attention_rmsnorm_maxval_axes_0, keep_dims = block_11_attention_rmsnorm_maxval_keep_dims_0, x = block_11_attention_rmsnorm_abs)[name = string("block_11_attention_rmsnorm_maxval")]; fp16 block_11_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_11_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_11_attention_rmsnorm_maxval_clipped = clip(alpha = block_11_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_11_attention_rmsnorm_maxval_clipped_beta_0, x = block_11_attention_rmsnorm_maxval)[name = string("block_11_attention_rmsnorm_maxval_clipped")]; tensor block_11_attention_rmsnorm_scaled = real_div(x = block_10_residual_2, y = block_11_attention_rmsnorm_maxval_clipped)[name = string("block_11_attention_rmsnorm_scaled")]; tensor block_11_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_11_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_11_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_11_attention_rmsnorm_squared_sum_keep_dims_0, x = block_11_attention_rmsnorm_scaled)[name = string("block_11_attention_rmsnorm_squared_sum")]; fp16 block_11_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_11_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_11_attention_rmsnorm_rsqrt_epsilon_0, x = block_11_attention_rmsnorm_squared_sum)[name = string("block_11_attention_rmsnorm_rsqrt")]; fp16 block_11_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_11_attention_rmsnorm_dim_scaled = mul(x = block_11_attention_rmsnorm_scaled, y = block_11_attention_rmsnorm_dim_scaled_y_0)[name = string("block_11_attention_rmsnorm_dim_scaled")]; tensor block_11_attention_rmsnorm_normalized = mul(x = block_11_attention_rmsnorm_dim_scaled, y = block_11_attention_rmsnorm_rsqrt)[name = string("block_11_attention_rmsnorm_normalized")]; tensor block_11_attention_rmsnorm_y_0 = const()[name = string("block_11_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418949952)))]; tensor block_11_attention_rmsnorm = mul(x = block_11_attention_rmsnorm_normalized, y = block_11_attention_rmsnorm_y_0)[name = string("block_11_attention_rmsnorm")]; tensor attention_11_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418951808))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419726016))))[name = string("attention_11_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_50 = constexpr_blockwise_shift_scale(data = attention_11_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419762944))))[name = string("constexpr_blockwise_shift_scale_50")]; tensor attention_11_qkvproj_bias_0 = const()[name = string("attention_11_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419765312)))]; tensor attention_11_qkvproj_strides_0 = const()[name = string("attention_11_qkvproj_strides_0"), val = tensor([1])]; string attention_11_qkvproj_pad_type_0 = const()[name = string("attention_11_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_11_qkvproj_pad_0 = const()[name = string("attention_11_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_11_qkvproj_dilations_0 = const()[name = string("attention_11_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_11_qkvproj_groups_0 = const()[name = string("attention_11_qkvproj_groups_0"), val = int32(1)]; tensor attention_11_qkvproj = conv(bias = attention_11_qkvproj_bias_0, dilations = attention_11_qkvproj_dilations_0, groups = attention_11_qkvproj_groups_0, pad = attention_11_qkvproj_pad_0, pad_type = attention_11_qkvproj_pad_type_0, strides = attention_11_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_50, x = block_11_attention_rmsnorm)[name = string("attention_11_qkvproj")]; tensor attention_11_head_reshape_shape_0 = const()[name = string("attention_11_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_11_head_reshape = reshape(shape = attention_11_head_reshape_shape_0, x = attention_11_qkvproj)[name = string("attention_11_head_reshape")]; tensor attention_11_head_transpose_perm_0 = const()[name = string("attention_11_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_11_split_qkv_heads_axis_0 = const()[name = string("attention_11_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_11_split_qkv_heads_split_sizes_0 = const()[name = string("attention_11_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_11_head_transpose = transpose(perm = attention_11_head_transpose_perm_0, x = attention_11_head_reshape)[name = string("transpose_26")]; tensor attention_11_split_qkv_heads_0, tensor attention_11_split_qkv_heads_1, tensor attention_11_split_qkv_heads_2 = split(axis = attention_11_split_qkv_heads_axis_0, split_sizes = attention_11_split_qkv_heads_split_sizes_0, x = attention_11_head_transpose)[name = string("attention_11_split_qkv_heads")]; tensor attention_11_q_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_11_q_rope_lhs_mult")]; int32 attention_11_q_rotate_half_split_num_splits_0 = const()[name = string("attention_11_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_11_q_rotate_half_split_axis_0 = const()[name = string("attention_11_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_11_q_rotate_half_split_0, tensor attention_11_q_rotate_half_split_1 = split(axis = attention_11_q_rotate_half_split_axis_0, num_splits = attention_11_q_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_0)[name = string("attention_11_q_rotate_half_split")]; fp16 attention_11_q_rotate_half_neg_y_0 = const()[name = string("attention_11_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_11_q_rotate_half_neg = mul(x = attention_11_q_rotate_half_split_1, y = attention_11_q_rotate_half_neg_y_0)[name = string("attention_11_q_rotate_half_neg")]; int32 attention_11_q_rotate_half_concat_axis_0 = const()[name = string("attention_11_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_11_q_rotate_half_concat_interleave_0 = const()[name = string("attention_11_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_11_q_rotate_half_concat = concat(axis = attention_11_q_rotate_half_concat_axis_0, interleave = attention_11_q_rotate_half_concat_interleave_0, values = (attention_11_q_rotate_half_neg, attention_11_q_rotate_half_split_0))[name = string("attention_11_q_rotate_half_concat")]; tensor attention_11_q_rope_rhs_mult = mul(x = attention_11_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_q_rope_rhs_mult")]; tensor attention_11_q_rope = add(x = attention_11_q_rope_lhs_mult, y = attention_11_q_rope_rhs_mult)[name = string("attention_11_q_rope")]; tensor attention_11_k_rope_lhs_mult = mul(x = attention_11_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_11_k_rope_lhs_mult")]; int32 attention_11_k_rotate_half_split_num_splits_0 = const()[name = string("attention_11_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_11_k_rotate_half_split_axis_0 = const()[name = string("attention_11_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_11_k_rotate_half_split_0, tensor attention_11_k_rotate_half_split_1 = split(axis = attention_11_k_rotate_half_split_axis_0, num_splits = attention_11_k_rotate_half_split_num_splits_0, x = attention_11_split_qkv_heads_1)[name = string("attention_11_k_rotate_half_split")]; fp16 attention_11_k_rotate_half_neg_y_0 = const()[name = string("attention_11_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_11_k_rotate_half_neg = mul(x = attention_11_k_rotate_half_split_1, y = attention_11_k_rotate_half_neg_y_0)[name = string("attention_11_k_rotate_half_neg")]; int32 attention_11_k_rotate_half_concat_axis_0 = const()[name = string("attention_11_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_11_k_rotate_half_concat_interleave_0 = const()[name = string("attention_11_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_11_k_rotate_half_concat = concat(axis = attention_11_k_rotate_half_concat_axis_0, interleave = attention_11_k_rotate_half_concat_interleave_0, values = (attention_11_k_rotate_half_neg, attention_11_k_rotate_half_split_0))[name = string("attention_11_k_rotate_half_concat")]; tensor attention_11_k_rope_rhs_mult = mul(x = attention_11_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_11_k_rope_rhs_mult")]; tensor attention_11_k_rope = add(x = attention_11_k_rope_lhs_mult, y = attention_11_k_rope_rhs_mult)[name = string("attention_11_k_rope")]; int32 attention_11_q_splits_axis_0 = const()[name = string("attention_11_q_splits_axis_0"), val = int32(1)]; int32 attention_11_q_splits_num_splits_0 = const()[name = string("attention_11_q_splits_num_splits_0"), val = int32(2)]; tensor attention_11_q_splits_0, tensor attention_11_q_splits_1 = split(axis = attention_11_q_splits_axis_0, num_splits = attention_11_q_splits_num_splits_0, x = attention_11_q_rope)[name = string("attention_11_q_splits")]; tensor attention_11_update_begin_0_values0_0 = const()[name = string("attention_11_update_begin_0_values0_0"), val = tensor([11])]; tensor attention_11_update_begin_0_values1_0 = const()[name = string("attention_11_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_11_update_begin_0_values3_0 = const()[name = string("attention_11_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_11_update_begin_0_axis_0 = const()[name = string("attention_11_update_begin_0_axis_0"), val = int32(0)]; bool attention_11_update_begin_0_interleave_0 = const()[name = string("attention_11_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_11_update_begin_0 = concat(axis = attention_11_update_begin_0_axis_0, interleave = attention_11_update_begin_0_interleave_0, values = (attention_11_update_begin_0_values0_0, attention_11_update_begin_0_values1_0, query_pos1, attention_11_update_begin_0_values3_0))[name = string("attention_11_update_begin_0")]; tensor attention_11_update_end_0_values0_0 = const()[name = string("attention_11_update_end_0_values0_0"), val = tensor([12])]; tensor attention_11_update_end_0_values1_0 = const()[name = string("attention_11_update_end_0_values1_0"), val = tensor([2])]; tensor attention_11_update_end_0_values3_0 = const()[name = string("attention_11_update_end_0_values3_0"), val = tensor([64])]; int32 attention_11_update_end_0_axis_0 = const()[name = string("attention_11_update_end_0_axis_0"), val = int32(0)]; bool attention_11_update_end_0_interleave_0 = const()[name = string("attention_11_update_end_0_interleave_0"), val = bool(false)]; tensor attention_11_update_end_0 = concat(axis = attention_11_update_end_0_axis_0, interleave = attention_11_update_end_0_interleave_0, values = (attention_11_update_end_0_values0_0, attention_11_update_end_0_values1_0, end_pos_0, attention_11_update_end_0_values3_0))[name = string("attention_11_update_end_0")]; tensor attention_11_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_updated_key_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_key_cache_0_squeeze_mask_0, update = attention_11_k_rope, x = coreml_update_state_20)[name = string("attention_11_updated_key_cache_0")]; write_state(data = attention_11_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_22 = read_state(input = key_cache_state)[name = string("coreml_update_state_70")]; tensor attention_11_key_cache_begin_0 = const()[name = string("attention_11_key_cache_begin_0"), val = tensor([11, 0, 0, 0])]; tensor attention_11_key_cache_end_0 = const()[name = string("attention_11_key_cache_end_0"), val = tensor([12, 2, 512, 64])]; tensor attention_11_key_cache_squeeze_mask_0 = const()[name = string("attention_11_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_key_cache = slice_by_index(begin = attention_11_key_cache_begin_0, end = attention_11_key_cache_end_0, squeeze_mask = attention_11_key_cache_squeeze_mask_0, x = coreml_update_state_22)[name = string("attention_11_key_cache")]; int32 attention_11_key_cache_head_axis_0 = const()[name = string("attention_11_key_cache_head_axis_0"), val = int32(1)]; int32 attention_11_key_cache_head_num_splits_0 = const()[name = string("attention_11_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_11_key_cache_head_0, tensor attention_11_key_cache_head_1 = split(axis = attention_11_key_cache_head_axis_0, num_splits = attention_11_key_cache_head_num_splits_0, x = attention_11_key_cache)[name = string("attention_11_key_cache_head")]; tensor attention_11_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_11_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_updated_value_cache_0 = slice_update(begin = attention_11_update_begin_0, end = attention_11_update_end_0, squeeze_mask = attention_11_updated_value_cache_0_squeeze_mask_0, update = attention_11_split_qkv_heads_2, x = coreml_update_state_21)[name = string("attention_11_updated_value_cache_0")]; write_state(data = attention_11_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_23 = read_state(input = value_cache_state)[name = string("coreml_update_state_71")]; tensor attention_11_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_11_slice_current_layer_value_cache_begin_0"), val = tensor([11, 0, 0, 0])]; tensor attention_11_slice_current_layer_value_cache_end_0 = const()[name = string("attention_11_slice_current_layer_value_cache_end_0"), val = tensor([12, 2, 512, 64])]; tensor attention_11_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_11_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_11_slice_current_layer_value_cache = slice_by_index(begin = attention_11_slice_current_layer_value_cache_begin_0, end = attention_11_slice_current_layer_value_cache_end_0, squeeze_mask = attention_11_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_23)[name = string("attention_11_slice_current_layer_value_cache")]; int32 attention_11_slice_value_cache_heads_axis_0 = const()[name = string("attention_11_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_11_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_11_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_11_slice_value_cache_heads_0, tensor attention_11_slice_value_cache_heads_1 = split(axis = attention_11_slice_value_cache_heads_axis_0, num_splits = attention_11_slice_value_cache_heads_num_splits_0, x = attention_11_slice_current_layer_value_cache)[name = string("attention_11_slice_value_cache_heads")]; bool attention_11_scores_0_transpose_y_0 = const()[name = string("attention_11_scores_0_transpose_y_0"), val = bool(true)]; bool attention_11_scores_0_transpose_x_0 = const()[name = string("attention_11_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_11_scores_0 = matmul(transpose_x = attention_11_scores_0_transpose_x_0, transpose_y = attention_11_scores_0_transpose_y_0, x = attention_11_key_cache_head_0, y = attention_11_q_splits_0)[name = string("attention_11_scores_0")]; fp16 attention_11_scaled_scores_0_y_0 = const()[name = string("attention_11_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_11_scaled_scores_0 = mul(x = attention_11_scores_0, y = attention_11_scaled_scores_0_y_0)[name = string("attention_11_scaled_scores_0")]; tensor attention_11_masked_scaled_scores_0 = add(x = attention_11_scaled_scores_0, y = transpose_0)[name = string("attention_11_masked_scaled_scores_0")]; int32 softmax_22_axis_0 = const()[name = string("softmax_22_axis_0"), val = int32(-2)]; tensor softmax_22 = softmax(axis = softmax_22_axis_0, x = attention_11_masked_scaled_scores_0)[name = string("softmax_22")]; bool attention_11_attention_0_transpose_x_0 = const()[name = string("attention_11_attention_0_transpose_x_0"), val = bool(true)]; bool attention_11_attention_0_transpose_y_0 = const()[name = string("attention_11_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_11_attention_0 = matmul(transpose_x = attention_11_attention_0_transpose_x_0, transpose_y = attention_11_attention_0_transpose_y_0, x = softmax_22, y = attention_11_slice_value_cache_heads_0)[name = string("attention_11_attention_0")]; bool attention_11_scores_1_transpose_y_0 = const()[name = string("attention_11_scores_1_transpose_y_0"), val = bool(true)]; bool attention_11_scores_1_transpose_x_0 = const()[name = string("attention_11_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_11_scores_1 = matmul(transpose_x = attention_11_scores_1_transpose_x_0, transpose_y = attention_11_scores_1_transpose_y_0, x = attention_11_key_cache_head_1, y = attention_11_q_splits_1)[name = string("attention_11_scores_1")]; fp16 attention_11_scaled_scores_1_y_0 = const()[name = string("attention_11_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_11_scaled_scores_1 = mul(x = attention_11_scores_1, y = attention_11_scaled_scores_1_y_0)[name = string("attention_11_scaled_scores_1")]; tensor attention_11_masked_scaled_scores_1 = add(x = attention_11_scaled_scores_1, y = transpose_0)[name = string("attention_11_masked_scaled_scores_1")]; int32 softmax_23_axis_0 = const()[name = string("softmax_23_axis_0"), val = int32(-2)]; tensor softmax_23 = softmax(axis = softmax_23_axis_0, x = attention_11_masked_scaled_scores_1)[name = string("softmax_23")]; bool attention_11_attention_1_transpose_x_0 = const()[name = string("attention_11_attention_1_transpose_x_0"), val = bool(true)]; bool attention_11_attention_1_transpose_y_0 = const()[name = string("attention_11_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_11_attention_1 = matmul(transpose_x = attention_11_attention_1_transpose_x_0, transpose_y = attention_11_attention_1_transpose_y_0, x = softmax_23, y = attention_11_slice_value_cache_heads_1)[name = string("attention_11_attention_1")]; int32 attention_11_concat_attention_all_heads_axis_0 = const()[name = string("attention_11_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_11_concat_attention_all_heads_interleave_0 = const()[name = string("attention_11_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_11_concat_attention_all_heads = concat(axis = attention_11_concat_attention_all_heads_axis_0, interleave = attention_11_concat_attention_all_heads_interleave_0, values = (attention_11_attention_0, attention_11_attention_1))[name = string("attention_11_concat_attention_all_heads")]; tensor attention_11_channels_first_retransposed_perm_0 = const()[name = string("attention_11_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_11_reshaped_shape_0 = const()[name = string("attention_11_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_11_channels_first_retransposed = transpose(perm = attention_11_channels_first_retransposed_perm_0, x = attention_11_concat_attention_all_heads)[name = string("transpose_25")]; tensor attention_11_reshaped = reshape(shape = attention_11_reshaped_shape_0, x = attention_11_channels_first_retransposed)[name = string("attention_11_reshaped")]; tensor attention_11_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(419767680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420369856))))[name = string("attention_11_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_51 = constexpr_blockwise_shift_scale(data = attention_11_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420398592))))[name = string("constexpr_blockwise_shift_scale_51")]; tensor attention_11_outproj_strides_0 = const()[name = string("attention_11_outproj_strides_0"), val = tensor([1])]; string attention_11_outproj_pad_type_0 = const()[name = string("attention_11_outproj_pad_type_0"), val = string("valid")]; tensor attention_11_outproj_pad_0 = const()[name = string("attention_11_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_11_outproj_dilations_0 = const()[name = string("attention_11_outproj_dilations_0"), val = tensor([1])]; int32 attention_11_outproj_groups_0 = const()[name = string("attention_11_outproj_groups_0"), val = int32(1)]; tensor attention_11_outproj = conv(dilations = attention_11_outproj_dilations_0, groups = attention_11_outproj_groups_0, pad = attention_11_outproj_pad_0, pad_type = attention_11_outproj_pad_type_0, strides = attention_11_outproj_strides_0, weight = constexpr_blockwise_shift_scale_51, x = attention_11_reshaped)[name = string("attention_11_outproj")]; tensor block_11_residual_1 = add(x = block_10_residual_2, y = attention_11_outproj)[name = string("block_11_residual_1")]; tensor block_11_ffn_rmsnorm_abs = abs(x = block_11_residual_1)[name = string("block_11_ffn_rmsnorm_abs")]; tensor block_11_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_11_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_11_ffn_rmsnorm_maxval = reduce_max(axes = block_11_ffn_rmsnorm_maxval_axes_0, keep_dims = block_11_ffn_rmsnorm_maxval_keep_dims_0, x = block_11_ffn_rmsnorm_abs)[name = string("block_11_ffn_rmsnorm_maxval")]; fp16 block_11_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_11_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_11_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_11_ffn_rmsnorm_maxval_clipped = clip(alpha = block_11_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_11_ffn_rmsnorm_maxval_clipped_beta_0, x = block_11_ffn_rmsnorm_maxval)[name = string("block_11_ffn_rmsnorm_maxval_clipped")]; tensor block_11_ffn_rmsnorm_scaled = real_div(x = block_11_residual_1, y = block_11_ffn_rmsnorm_maxval_clipped)[name = string("block_11_ffn_rmsnorm_scaled")]; tensor block_11_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_11_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_11_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_11_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_11_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_11_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_11_ffn_rmsnorm_scaled)[name = string("block_11_ffn_rmsnorm_squared_sum")]; fp16 block_11_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_11_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_11_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_11_ffn_rmsnorm_rsqrt_epsilon_0, x = block_11_ffn_rmsnorm_squared_sum)[name = string("block_11_ffn_rmsnorm_rsqrt")]; fp16 block_11_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_11_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_11_ffn_rmsnorm_dim_scaled = mul(x = block_11_ffn_rmsnorm_scaled, y = block_11_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_11_ffn_rmsnorm_dim_scaled")]; tensor block_11_ffn_rmsnorm_normalized = mul(x = block_11_ffn_rmsnorm_dim_scaled, y = block_11_ffn_rmsnorm_rsqrt)[name = string("block_11_ffn_rmsnorm_normalized")]; tensor block_11_ffn_rmsnorm_y_0 = const()[name = string("block_11_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420400448)))]; tensor block_11_ffn_rmsnorm = mul(x = block_11_ffn_rmsnorm_normalized, y = block_11_ffn_rmsnorm_y_0)[name = string("block_11_ffn_rmsnorm")]; tensor block_11_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420402304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423670976))))[name = string("block_11_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_52 = constexpr_blockwise_shift_scale(data = block_11_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423826688))))[name = string("constexpr_blockwise_shift_scale_52")]; tensor block_11_ffn_inproj_strides_0 = const()[name = string("block_11_ffn_inproj_strides_0"), val = tensor([1])]; string block_11_ffn_inproj_pad_type_0 = const()[name = string("block_11_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_11_ffn_inproj_pad_0 = const()[name = string("block_11_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_11_ffn_inproj_dilations_0 = const()[name = string("block_11_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_11_ffn_inproj_groups_0 = const()[name = string("block_11_ffn_inproj_groups_0"), val = int32(1)]; tensor block_11_ffn_inproj = conv(dilations = block_11_ffn_inproj_dilations_0, groups = block_11_ffn_inproj_groups_0, pad = block_11_ffn_inproj_pad_0, pad_type = block_11_ffn_inproj_pad_type_0, strides = block_11_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_52, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_inproj")]; tensor block_11_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423836480))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427105152))))[name = string("block_11_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_53 = constexpr_blockwise_shift_scale(data = block_11_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427260864))))[name = string("constexpr_blockwise_shift_scale_53")]; tensor block_11_ffn_g_strides_0 = const()[name = string("block_11_ffn_g_strides_0"), val = tensor([1])]; string block_11_ffn_g_pad_type_0 = const()[name = string("block_11_ffn_g_pad_type_0"), val = string("valid")]; tensor block_11_ffn_g_pad_0 = const()[name = string("block_11_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_11_ffn_g_dilations_0 = const()[name = string("block_11_ffn_g_dilations_0"), val = tensor([1])]; int32 block_11_ffn_g_groups_0 = const()[name = string("block_11_ffn_g_groups_0"), val = int32(1)]; tensor block_11_ffn_g = conv(dilations = block_11_ffn_g_dilations_0, groups = block_11_ffn_g_groups_0, pad = block_11_ffn_g_pad_0, pad_type = block_11_ffn_g_pad_type_0, strides = block_11_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_53, x = block_11_ffn_rmsnorm)[name = string("block_11_ffn_g")]; tensor block_11_ffn_g_activation = silu(x = block_11_ffn_g)[name = string("block_11_ffn_g_activation")]; tensor block_11_ffn_x_gated = mul(x = block_11_ffn_inproj, y = block_11_ffn_g_activation)[name = string("block_11_ffn_x_gated")]; tensor block_11_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427270656))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430539328))))[name = string("block_11_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_54 = constexpr_blockwise_shift_scale(data = block_11_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430568064))))[name = string("constexpr_blockwise_shift_scale_54")]; tensor block_11_ffn_outproj_strides_0 = const()[name = string("block_11_ffn_outproj_strides_0"), val = tensor([1])]; string block_11_ffn_outproj_pad_type_0 = const()[name = string("block_11_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_11_ffn_outproj_pad_0 = const()[name = string("block_11_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_11_ffn_outproj_dilations_0 = const()[name = string("block_11_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_11_ffn_outproj_groups_0 = const()[name = string("block_11_ffn_outproj_groups_0"), val = int32(1)]; tensor block_11_ffn_outproj = conv(dilations = block_11_ffn_outproj_dilations_0, groups = block_11_ffn_outproj_groups_0, pad = block_11_ffn_outproj_pad_0, pad_type = block_11_ffn_outproj_pad_type_0, strides = block_11_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_54, x = block_11_ffn_x_gated)[name = string("block_11_ffn_outproj")]; tensor block_11_residual_2 = add(x = block_11_ffn_outproj, y = block_11_residual_1)[name = string("block_11_residual_2")]; tensor block_12_attention_rmsnorm_abs = abs(x = block_11_residual_2)[name = string("block_12_attention_rmsnorm_abs")]; tensor block_12_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_12_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_12_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_12_attention_rmsnorm_maxval = reduce_max(axes = block_12_attention_rmsnorm_maxval_axes_0, keep_dims = block_12_attention_rmsnorm_maxval_keep_dims_0, x = block_12_attention_rmsnorm_abs)[name = string("block_12_attention_rmsnorm_maxval")]; fp16 block_12_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_12_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_12_attention_rmsnorm_maxval_clipped = clip(alpha = block_12_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_12_attention_rmsnorm_maxval_clipped_beta_0, x = block_12_attention_rmsnorm_maxval)[name = string("block_12_attention_rmsnorm_maxval_clipped")]; tensor block_12_attention_rmsnorm_scaled = real_div(x = block_11_residual_2, y = block_12_attention_rmsnorm_maxval_clipped)[name = string("block_12_attention_rmsnorm_scaled")]; tensor block_12_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_12_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_12_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_12_attention_rmsnorm_squared_sum_keep_dims_0, x = block_12_attention_rmsnorm_scaled)[name = string("block_12_attention_rmsnorm_squared_sum")]; fp16 block_12_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_12_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_12_attention_rmsnorm_rsqrt_epsilon_0, x = block_12_attention_rmsnorm_squared_sum)[name = string("block_12_attention_rmsnorm_rsqrt")]; fp16 block_12_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_12_attention_rmsnorm_dim_scaled = mul(x = block_12_attention_rmsnorm_scaled, y = block_12_attention_rmsnorm_dim_scaled_y_0)[name = string("block_12_attention_rmsnorm_dim_scaled")]; tensor block_12_attention_rmsnorm_normalized = mul(x = block_12_attention_rmsnorm_dim_scaled, y = block_12_attention_rmsnorm_rsqrt)[name = string("block_12_attention_rmsnorm_normalized")]; tensor block_12_attention_rmsnorm_y_0 = const()[name = string("block_12_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430569920)))]; tensor block_12_attention_rmsnorm = mul(x = block_12_attention_rmsnorm_normalized, y = block_12_attention_rmsnorm_y_0)[name = string("block_12_attention_rmsnorm")]; tensor attention_12_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430571776))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431345984))))[name = string("attention_12_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_55 = constexpr_blockwise_shift_scale(data = attention_12_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431382912))))[name = string("constexpr_blockwise_shift_scale_55")]; tensor attention_12_qkvproj_bias_0 = const()[name = string("attention_12_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431385280)))]; tensor attention_12_qkvproj_strides_0 = const()[name = string("attention_12_qkvproj_strides_0"), val = tensor([1])]; string attention_12_qkvproj_pad_type_0 = const()[name = string("attention_12_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_12_qkvproj_pad_0 = const()[name = string("attention_12_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_12_qkvproj_dilations_0 = const()[name = string("attention_12_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_12_qkvproj_groups_0 = const()[name = string("attention_12_qkvproj_groups_0"), val = int32(1)]; tensor attention_12_qkvproj = conv(bias = attention_12_qkvproj_bias_0, dilations = attention_12_qkvproj_dilations_0, groups = attention_12_qkvproj_groups_0, pad = attention_12_qkvproj_pad_0, pad_type = attention_12_qkvproj_pad_type_0, strides = attention_12_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_55, x = block_12_attention_rmsnorm)[name = string("attention_12_qkvproj")]; tensor attention_12_head_reshape_shape_0 = const()[name = string("attention_12_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_12_head_reshape = reshape(shape = attention_12_head_reshape_shape_0, x = attention_12_qkvproj)[name = string("attention_12_head_reshape")]; tensor attention_12_head_transpose_perm_0 = const()[name = string("attention_12_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_12_split_qkv_heads_axis_0 = const()[name = string("attention_12_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_12_split_qkv_heads_split_sizes_0 = const()[name = string("attention_12_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_12_head_transpose = transpose(perm = attention_12_head_transpose_perm_0, x = attention_12_head_reshape)[name = string("transpose_24")]; tensor attention_12_split_qkv_heads_0, tensor attention_12_split_qkv_heads_1, tensor attention_12_split_qkv_heads_2 = split(axis = attention_12_split_qkv_heads_axis_0, split_sizes = attention_12_split_qkv_heads_split_sizes_0, x = attention_12_head_transpose)[name = string("attention_12_split_qkv_heads")]; tensor attention_12_q_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_12_q_rope_lhs_mult")]; int32 attention_12_q_rotate_half_split_num_splits_0 = const()[name = string("attention_12_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_12_q_rotate_half_split_axis_0 = const()[name = string("attention_12_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_12_q_rotate_half_split_0, tensor attention_12_q_rotate_half_split_1 = split(axis = attention_12_q_rotate_half_split_axis_0, num_splits = attention_12_q_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_0)[name = string("attention_12_q_rotate_half_split")]; fp16 attention_12_q_rotate_half_neg_y_0 = const()[name = string("attention_12_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_12_q_rotate_half_neg = mul(x = attention_12_q_rotate_half_split_1, y = attention_12_q_rotate_half_neg_y_0)[name = string("attention_12_q_rotate_half_neg")]; int32 attention_12_q_rotate_half_concat_axis_0 = const()[name = string("attention_12_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_12_q_rotate_half_concat_interleave_0 = const()[name = string("attention_12_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_12_q_rotate_half_concat = concat(axis = attention_12_q_rotate_half_concat_axis_0, interleave = attention_12_q_rotate_half_concat_interleave_0, values = (attention_12_q_rotate_half_neg, attention_12_q_rotate_half_split_0))[name = string("attention_12_q_rotate_half_concat")]; tensor attention_12_q_rope_rhs_mult = mul(x = attention_12_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_q_rope_rhs_mult")]; tensor attention_12_q_rope = add(x = attention_12_q_rope_lhs_mult, y = attention_12_q_rope_rhs_mult)[name = string("attention_12_q_rope")]; tensor attention_12_k_rope_lhs_mult = mul(x = attention_12_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_12_k_rope_lhs_mult")]; int32 attention_12_k_rotate_half_split_num_splits_0 = const()[name = string("attention_12_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_12_k_rotate_half_split_axis_0 = const()[name = string("attention_12_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_12_k_rotate_half_split_0, tensor attention_12_k_rotate_half_split_1 = split(axis = attention_12_k_rotate_half_split_axis_0, num_splits = attention_12_k_rotate_half_split_num_splits_0, x = attention_12_split_qkv_heads_1)[name = string("attention_12_k_rotate_half_split")]; fp16 attention_12_k_rotate_half_neg_y_0 = const()[name = string("attention_12_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_12_k_rotate_half_neg = mul(x = attention_12_k_rotate_half_split_1, y = attention_12_k_rotate_half_neg_y_0)[name = string("attention_12_k_rotate_half_neg")]; int32 attention_12_k_rotate_half_concat_axis_0 = const()[name = string("attention_12_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_12_k_rotate_half_concat_interleave_0 = const()[name = string("attention_12_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_12_k_rotate_half_concat = concat(axis = attention_12_k_rotate_half_concat_axis_0, interleave = attention_12_k_rotate_half_concat_interleave_0, values = (attention_12_k_rotate_half_neg, attention_12_k_rotate_half_split_0))[name = string("attention_12_k_rotate_half_concat")]; tensor attention_12_k_rope_rhs_mult = mul(x = attention_12_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_12_k_rope_rhs_mult")]; tensor attention_12_k_rope = add(x = attention_12_k_rope_lhs_mult, y = attention_12_k_rope_rhs_mult)[name = string("attention_12_k_rope")]; int32 attention_12_q_splits_axis_0 = const()[name = string("attention_12_q_splits_axis_0"), val = int32(1)]; int32 attention_12_q_splits_num_splits_0 = const()[name = string("attention_12_q_splits_num_splits_0"), val = int32(2)]; tensor attention_12_q_splits_0, tensor attention_12_q_splits_1 = split(axis = attention_12_q_splits_axis_0, num_splits = attention_12_q_splits_num_splits_0, x = attention_12_q_rope)[name = string("attention_12_q_splits")]; tensor attention_12_update_begin_0_values0_0 = const()[name = string("attention_12_update_begin_0_values0_0"), val = tensor([12])]; tensor attention_12_update_begin_0_values1_0 = const()[name = string("attention_12_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_12_update_begin_0_values3_0 = const()[name = string("attention_12_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_12_update_begin_0_axis_0 = const()[name = string("attention_12_update_begin_0_axis_0"), val = int32(0)]; bool attention_12_update_begin_0_interleave_0 = const()[name = string("attention_12_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_12_update_begin_0 = concat(axis = attention_12_update_begin_0_axis_0, interleave = attention_12_update_begin_0_interleave_0, values = (attention_12_update_begin_0_values0_0, attention_12_update_begin_0_values1_0, query_pos1, attention_12_update_begin_0_values3_0))[name = string("attention_12_update_begin_0")]; tensor attention_12_update_end_0_values0_0 = const()[name = string("attention_12_update_end_0_values0_0"), val = tensor([13])]; tensor attention_12_update_end_0_values1_0 = const()[name = string("attention_12_update_end_0_values1_0"), val = tensor([2])]; tensor attention_12_update_end_0_values3_0 = const()[name = string("attention_12_update_end_0_values3_0"), val = tensor([64])]; int32 attention_12_update_end_0_axis_0 = const()[name = string("attention_12_update_end_0_axis_0"), val = int32(0)]; bool attention_12_update_end_0_interleave_0 = const()[name = string("attention_12_update_end_0_interleave_0"), val = bool(false)]; tensor attention_12_update_end_0 = concat(axis = attention_12_update_end_0_axis_0, interleave = attention_12_update_end_0_interleave_0, values = (attention_12_update_end_0_values0_0, attention_12_update_end_0_values1_0, end_pos_0, attention_12_update_end_0_values3_0))[name = string("attention_12_update_end_0")]; tensor attention_12_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_updated_key_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_key_cache_0_squeeze_mask_0, update = attention_12_k_rope, x = coreml_update_state_22)[name = string("attention_12_updated_key_cache_0")]; write_state(data = attention_12_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_24 = read_state(input = key_cache_state)[name = string("coreml_update_state_72")]; tensor attention_12_key_cache_begin_0 = const()[name = string("attention_12_key_cache_begin_0"), val = tensor([12, 0, 0, 0])]; tensor attention_12_key_cache_end_0 = const()[name = string("attention_12_key_cache_end_0"), val = tensor([13, 2, 512, 64])]; tensor attention_12_key_cache_squeeze_mask_0 = const()[name = string("attention_12_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_key_cache = slice_by_index(begin = attention_12_key_cache_begin_0, end = attention_12_key_cache_end_0, squeeze_mask = attention_12_key_cache_squeeze_mask_0, x = coreml_update_state_24)[name = string("attention_12_key_cache")]; int32 attention_12_key_cache_head_axis_0 = const()[name = string("attention_12_key_cache_head_axis_0"), val = int32(1)]; int32 attention_12_key_cache_head_num_splits_0 = const()[name = string("attention_12_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_12_key_cache_head_0, tensor attention_12_key_cache_head_1 = split(axis = attention_12_key_cache_head_axis_0, num_splits = attention_12_key_cache_head_num_splits_0, x = attention_12_key_cache)[name = string("attention_12_key_cache_head")]; tensor attention_12_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_12_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_updated_value_cache_0 = slice_update(begin = attention_12_update_begin_0, end = attention_12_update_end_0, squeeze_mask = attention_12_updated_value_cache_0_squeeze_mask_0, update = attention_12_split_qkv_heads_2, x = coreml_update_state_23)[name = string("attention_12_updated_value_cache_0")]; write_state(data = attention_12_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_25 = read_state(input = value_cache_state)[name = string("coreml_update_state_73")]; tensor attention_12_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_12_slice_current_layer_value_cache_begin_0"), val = tensor([12, 0, 0, 0])]; tensor attention_12_slice_current_layer_value_cache_end_0 = const()[name = string("attention_12_slice_current_layer_value_cache_end_0"), val = tensor([13, 2, 512, 64])]; tensor attention_12_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_12_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_12_slice_current_layer_value_cache = slice_by_index(begin = attention_12_slice_current_layer_value_cache_begin_0, end = attention_12_slice_current_layer_value_cache_end_0, squeeze_mask = attention_12_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_25)[name = string("attention_12_slice_current_layer_value_cache")]; int32 attention_12_slice_value_cache_heads_axis_0 = const()[name = string("attention_12_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_12_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_12_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_12_slice_value_cache_heads_0, tensor attention_12_slice_value_cache_heads_1 = split(axis = attention_12_slice_value_cache_heads_axis_0, num_splits = attention_12_slice_value_cache_heads_num_splits_0, x = attention_12_slice_current_layer_value_cache)[name = string("attention_12_slice_value_cache_heads")]; bool attention_12_scores_0_transpose_y_0 = const()[name = string("attention_12_scores_0_transpose_y_0"), val = bool(true)]; bool attention_12_scores_0_transpose_x_0 = const()[name = string("attention_12_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_12_scores_0 = matmul(transpose_x = attention_12_scores_0_transpose_x_0, transpose_y = attention_12_scores_0_transpose_y_0, x = attention_12_key_cache_head_0, y = attention_12_q_splits_0)[name = string("attention_12_scores_0")]; fp16 attention_12_scaled_scores_0_y_0 = const()[name = string("attention_12_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_12_scaled_scores_0 = mul(x = attention_12_scores_0, y = attention_12_scaled_scores_0_y_0)[name = string("attention_12_scaled_scores_0")]; tensor attention_12_masked_scaled_scores_0 = add(x = attention_12_scaled_scores_0, y = transpose_0)[name = string("attention_12_masked_scaled_scores_0")]; int32 softmax_24_axis_0 = const()[name = string("softmax_24_axis_0"), val = int32(-2)]; tensor softmax_24 = softmax(axis = softmax_24_axis_0, x = attention_12_masked_scaled_scores_0)[name = string("softmax_24")]; bool attention_12_attention_0_transpose_x_0 = const()[name = string("attention_12_attention_0_transpose_x_0"), val = bool(true)]; bool attention_12_attention_0_transpose_y_0 = const()[name = string("attention_12_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_12_attention_0 = matmul(transpose_x = attention_12_attention_0_transpose_x_0, transpose_y = attention_12_attention_0_transpose_y_0, x = softmax_24, y = attention_12_slice_value_cache_heads_0)[name = string("attention_12_attention_0")]; bool attention_12_scores_1_transpose_y_0 = const()[name = string("attention_12_scores_1_transpose_y_0"), val = bool(true)]; bool attention_12_scores_1_transpose_x_0 = const()[name = string("attention_12_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_12_scores_1 = matmul(transpose_x = attention_12_scores_1_transpose_x_0, transpose_y = attention_12_scores_1_transpose_y_0, x = attention_12_key_cache_head_1, y = attention_12_q_splits_1)[name = string("attention_12_scores_1")]; fp16 attention_12_scaled_scores_1_y_0 = const()[name = string("attention_12_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_12_scaled_scores_1 = mul(x = attention_12_scores_1, y = attention_12_scaled_scores_1_y_0)[name = string("attention_12_scaled_scores_1")]; tensor attention_12_masked_scaled_scores_1 = add(x = attention_12_scaled_scores_1, y = transpose_0)[name = string("attention_12_masked_scaled_scores_1")]; int32 softmax_25_axis_0 = const()[name = string("softmax_25_axis_0"), val = int32(-2)]; tensor softmax_25 = softmax(axis = softmax_25_axis_0, x = attention_12_masked_scaled_scores_1)[name = string("softmax_25")]; bool attention_12_attention_1_transpose_x_0 = const()[name = string("attention_12_attention_1_transpose_x_0"), val = bool(true)]; bool attention_12_attention_1_transpose_y_0 = const()[name = string("attention_12_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_12_attention_1 = matmul(transpose_x = attention_12_attention_1_transpose_x_0, transpose_y = attention_12_attention_1_transpose_y_0, x = softmax_25, y = attention_12_slice_value_cache_heads_1)[name = string("attention_12_attention_1")]; int32 attention_12_concat_attention_all_heads_axis_0 = const()[name = string("attention_12_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_12_concat_attention_all_heads_interleave_0 = const()[name = string("attention_12_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_12_concat_attention_all_heads = concat(axis = attention_12_concat_attention_all_heads_axis_0, interleave = attention_12_concat_attention_all_heads_interleave_0, values = (attention_12_attention_0, attention_12_attention_1))[name = string("attention_12_concat_attention_all_heads")]; tensor attention_12_channels_first_retransposed_perm_0 = const()[name = string("attention_12_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_12_reshaped_shape_0 = const()[name = string("attention_12_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_12_channels_first_retransposed = transpose(perm = attention_12_channels_first_retransposed_perm_0, x = attention_12_concat_attention_all_heads)[name = string("transpose_23")]; tensor attention_12_reshaped = reshape(shape = attention_12_reshaped_shape_0, x = attention_12_channels_first_retransposed)[name = string("attention_12_reshaped")]; tensor attention_12_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431387648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(431989824))))[name = string("attention_12_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_56 = constexpr_blockwise_shift_scale(data = attention_12_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432018560))))[name = string("constexpr_blockwise_shift_scale_56")]; tensor attention_12_outproj_strides_0 = const()[name = string("attention_12_outproj_strides_0"), val = tensor([1])]; string attention_12_outproj_pad_type_0 = const()[name = string("attention_12_outproj_pad_type_0"), val = string("valid")]; tensor attention_12_outproj_pad_0 = const()[name = string("attention_12_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_12_outproj_dilations_0 = const()[name = string("attention_12_outproj_dilations_0"), val = tensor([1])]; int32 attention_12_outproj_groups_0 = const()[name = string("attention_12_outproj_groups_0"), val = int32(1)]; tensor attention_12_outproj = conv(dilations = attention_12_outproj_dilations_0, groups = attention_12_outproj_groups_0, pad = attention_12_outproj_pad_0, pad_type = attention_12_outproj_pad_type_0, strides = attention_12_outproj_strides_0, weight = constexpr_blockwise_shift_scale_56, x = attention_12_reshaped)[name = string("attention_12_outproj")]; tensor block_12_residual_1 = add(x = block_11_residual_2, y = attention_12_outproj)[name = string("block_12_residual_1")]; tensor block_12_ffn_rmsnorm_abs = abs(x = block_12_residual_1)[name = string("block_12_ffn_rmsnorm_abs")]; tensor block_12_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_12_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_12_ffn_rmsnorm_maxval = reduce_max(axes = block_12_ffn_rmsnorm_maxval_axes_0, keep_dims = block_12_ffn_rmsnorm_maxval_keep_dims_0, x = block_12_ffn_rmsnorm_abs)[name = string("block_12_ffn_rmsnorm_maxval")]; fp16 block_12_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_12_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_12_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_12_ffn_rmsnorm_maxval_clipped = clip(alpha = block_12_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_12_ffn_rmsnorm_maxval_clipped_beta_0, x = block_12_ffn_rmsnorm_maxval)[name = string("block_12_ffn_rmsnorm_maxval_clipped")]; tensor block_12_ffn_rmsnorm_scaled = real_div(x = block_12_residual_1, y = block_12_ffn_rmsnorm_maxval_clipped)[name = string("block_12_ffn_rmsnorm_scaled")]; tensor block_12_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_12_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_12_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_12_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_12_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_12_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_12_ffn_rmsnorm_scaled)[name = string("block_12_ffn_rmsnorm_squared_sum")]; fp16 block_12_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_12_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_12_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_12_ffn_rmsnorm_rsqrt_epsilon_0, x = block_12_ffn_rmsnorm_squared_sum)[name = string("block_12_ffn_rmsnorm_rsqrt")]; fp16 block_12_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_12_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_12_ffn_rmsnorm_dim_scaled = mul(x = block_12_ffn_rmsnorm_scaled, y = block_12_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_12_ffn_rmsnorm_dim_scaled")]; tensor block_12_ffn_rmsnorm_normalized = mul(x = block_12_ffn_rmsnorm_dim_scaled, y = block_12_ffn_rmsnorm_rsqrt)[name = string("block_12_ffn_rmsnorm_normalized")]; tensor block_12_ffn_rmsnorm_y_0 = const()[name = string("block_12_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432020416)))]; tensor block_12_ffn_rmsnorm = mul(x = block_12_ffn_rmsnorm_normalized, y = block_12_ffn_rmsnorm_y_0)[name = string("block_12_ffn_rmsnorm")]; tensor block_12_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(432022272))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435290944))))[name = string("block_12_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_57 = constexpr_blockwise_shift_scale(data = block_12_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435446656))))[name = string("constexpr_blockwise_shift_scale_57")]; tensor block_12_ffn_inproj_strides_0 = const()[name = string("block_12_ffn_inproj_strides_0"), val = tensor([1])]; string block_12_ffn_inproj_pad_type_0 = const()[name = string("block_12_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_12_ffn_inproj_pad_0 = const()[name = string("block_12_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_12_ffn_inproj_dilations_0 = const()[name = string("block_12_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_12_ffn_inproj_groups_0 = const()[name = string("block_12_ffn_inproj_groups_0"), val = int32(1)]; tensor block_12_ffn_inproj = conv(dilations = block_12_ffn_inproj_dilations_0, groups = block_12_ffn_inproj_groups_0, pad = block_12_ffn_inproj_pad_0, pad_type = block_12_ffn_inproj_pad_type_0, strides = block_12_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_57, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_inproj")]; tensor block_12_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435456448))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438725120))))[name = string("block_12_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_58 = constexpr_blockwise_shift_scale(data = block_12_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438880832))))[name = string("constexpr_blockwise_shift_scale_58")]; tensor block_12_ffn_g_strides_0 = const()[name = string("block_12_ffn_g_strides_0"), val = tensor([1])]; string block_12_ffn_g_pad_type_0 = const()[name = string("block_12_ffn_g_pad_type_0"), val = string("valid")]; tensor block_12_ffn_g_pad_0 = const()[name = string("block_12_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_12_ffn_g_dilations_0 = const()[name = string("block_12_ffn_g_dilations_0"), val = tensor([1])]; int32 block_12_ffn_g_groups_0 = const()[name = string("block_12_ffn_g_groups_0"), val = int32(1)]; tensor block_12_ffn_g = conv(dilations = block_12_ffn_g_dilations_0, groups = block_12_ffn_g_groups_0, pad = block_12_ffn_g_pad_0, pad_type = block_12_ffn_g_pad_type_0, strides = block_12_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_58, x = block_12_ffn_rmsnorm)[name = string("block_12_ffn_g")]; tensor block_12_ffn_g_activation = silu(x = block_12_ffn_g)[name = string("block_12_ffn_g_activation")]; tensor block_12_ffn_x_gated = mul(x = block_12_ffn_inproj, y = block_12_ffn_g_activation)[name = string("block_12_ffn_x_gated")]; tensor block_12_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(438890624))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442159296))))[name = string("block_12_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_59 = constexpr_blockwise_shift_scale(data = block_12_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442188032))))[name = string("constexpr_blockwise_shift_scale_59")]; tensor block_12_ffn_outproj_strides_0 = const()[name = string("block_12_ffn_outproj_strides_0"), val = tensor([1])]; string block_12_ffn_outproj_pad_type_0 = const()[name = string("block_12_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_12_ffn_outproj_pad_0 = const()[name = string("block_12_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_12_ffn_outproj_dilations_0 = const()[name = string("block_12_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_12_ffn_outproj_groups_0 = const()[name = string("block_12_ffn_outproj_groups_0"), val = int32(1)]; tensor block_12_ffn_outproj = conv(dilations = block_12_ffn_outproj_dilations_0, groups = block_12_ffn_outproj_groups_0, pad = block_12_ffn_outproj_pad_0, pad_type = block_12_ffn_outproj_pad_type_0, strides = block_12_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_59, x = block_12_ffn_x_gated)[name = string("block_12_ffn_outproj")]; tensor block_12_residual_2 = add(x = block_12_ffn_outproj, y = block_12_residual_1)[name = string("block_12_residual_2")]; tensor block_13_attention_rmsnorm_abs = abs(x = block_12_residual_2)[name = string("block_13_attention_rmsnorm_abs")]; tensor block_13_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_13_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_13_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_13_attention_rmsnorm_maxval = reduce_max(axes = block_13_attention_rmsnorm_maxval_axes_0, keep_dims = block_13_attention_rmsnorm_maxval_keep_dims_0, x = block_13_attention_rmsnorm_abs)[name = string("block_13_attention_rmsnorm_maxval")]; fp16 block_13_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_13_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_13_attention_rmsnorm_maxval_clipped = clip(alpha = block_13_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_13_attention_rmsnorm_maxval_clipped_beta_0, x = block_13_attention_rmsnorm_maxval)[name = string("block_13_attention_rmsnorm_maxval_clipped")]; tensor block_13_attention_rmsnorm_scaled = real_div(x = block_12_residual_2, y = block_13_attention_rmsnorm_maxval_clipped)[name = string("block_13_attention_rmsnorm_scaled")]; tensor block_13_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_13_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_13_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_13_attention_rmsnorm_squared_sum_keep_dims_0, x = block_13_attention_rmsnorm_scaled)[name = string("block_13_attention_rmsnorm_squared_sum")]; fp16 block_13_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_13_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_13_attention_rmsnorm_rsqrt_epsilon_0, x = block_13_attention_rmsnorm_squared_sum)[name = string("block_13_attention_rmsnorm_rsqrt")]; fp16 block_13_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_13_attention_rmsnorm_dim_scaled = mul(x = block_13_attention_rmsnorm_scaled, y = block_13_attention_rmsnorm_dim_scaled_y_0)[name = string("block_13_attention_rmsnorm_dim_scaled")]; tensor block_13_attention_rmsnorm_normalized = mul(x = block_13_attention_rmsnorm_dim_scaled, y = block_13_attention_rmsnorm_rsqrt)[name = string("block_13_attention_rmsnorm_normalized")]; tensor block_13_attention_rmsnorm_y_0 = const()[name = string("block_13_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442189888)))]; tensor block_13_attention_rmsnorm = mul(x = block_13_attention_rmsnorm_normalized, y = block_13_attention_rmsnorm_y_0)[name = string("block_13_attention_rmsnorm")]; tensor attention_13_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442191744))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(442965952))))[name = string("attention_13_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_60 = constexpr_blockwise_shift_scale(data = attention_13_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443002880))))[name = string("constexpr_blockwise_shift_scale_60")]; tensor attention_13_qkvproj_bias_0 = const()[name = string("attention_13_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443005248)))]; tensor attention_13_qkvproj_strides_0 = const()[name = string("attention_13_qkvproj_strides_0"), val = tensor([1])]; string attention_13_qkvproj_pad_type_0 = const()[name = string("attention_13_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_13_qkvproj_pad_0 = const()[name = string("attention_13_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_13_qkvproj_dilations_0 = const()[name = string("attention_13_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_13_qkvproj_groups_0 = const()[name = string("attention_13_qkvproj_groups_0"), val = int32(1)]; tensor attention_13_qkvproj = conv(bias = attention_13_qkvproj_bias_0, dilations = attention_13_qkvproj_dilations_0, groups = attention_13_qkvproj_groups_0, pad = attention_13_qkvproj_pad_0, pad_type = attention_13_qkvproj_pad_type_0, strides = attention_13_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_60, x = block_13_attention_rmsnorm)[name = string("attention_13_qkvproj")]; tensor attention_13_head_reshape_shape_0 = const()[name = string("attention_13_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_13_head_reshape = reshape(shape = attention_13_head_reshape_shape_0, x = attention_13_qkvproj)[name = string("attention_13_head_reshape")]; tensor attention_13_head_transpose_perm_0 = const()[name = string("attention_13_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_13_split_qkv_heads_axis_0 = const()[name = string("attention_13_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_13_split_qkv_heads_split_sizes_0 = const()[name = string("attention_13_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_13_head_transpose = transpose(perm = attention_13_head_transpose_perm_0, x = attention_13_head_reshape)[name = string("transpose_22")]; tensor attention_13_split_qkv_heads_0, tensor attention_13_split_qkv_heads_1, tensor attention_13_split_qkv_heads_2 = split(axis = attention_13_split_qkv_heads_axis_0, split_sizes = attention_13_split_qkv_heads_split_sizes_0, x = attention_13_head_transpose)[name = string("attention_13_split_qkv_heads")]; tensor attention_13_q_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_13_q_rope_lhs_mult")]; int32 attention_13_q_rotate_half_split_num_splits_0 = const()[name = string("attention_13_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_13_q_rotate_half_split_axis_0 = const()[name = string("attention_13_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_13_q_rotate_half_split_0, tensor attention_13_q_rotate_half_split_1 = split(axis = attention_13_q_rotate_half_split_axis_0, num_splits = attention_13_q_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_0)[name = string("attention_13_q_rotate_half_split")]; fp16 attention_13_q_rotate_half_neg_y_0 = const()[name = string("attention_13_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_13_q_rotate_half_neg = mul(x = attention_13_q_rotate_half_split_1, y = attention_13_q_rotate_half_neg_y_0)[name = string("attention_13_q_rotate_half_neg")]; int32 attention_13_q_rotate_half_concat_axis_0 = const()[name = string("attention_13_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_13_q_rotate_half_concat_interleave_0 = const()[name = string("attention_13_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_13_q_rotate_half_concat = concat(axis = attention_13_q_rotate_half_concat_axis_0, interleave = attention_13_q_rotate_half_concat_interleave_0, values = (attention_13_q_rotate_half_neg, attention_13_q_rotate_half_split_0))[name = string("attention_13_q_rotate_half_concat")]; tensor attention_13_q_rope_rhs_mult = mul(x = attention_13_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_q_rope_rhs_mult")]; tensor attention_13_q_rope = add(x = attention_13_q_rope_lhs_mult, y = attention_13_q_rope_rhs_mult)[name = string("attention_13_q_rope")]; tensor attention_13_k_rope_lhs_mult = mul(x = attention_13_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_13_k_rope_lhs_mult")]; int32 attention_13_k_rotate_half_split_num_splits_0 = const()[name = string("attention_13_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_13_k_rotate_half_split_axis_0 = const()[name = string("attention_13_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_13_k_rotate_half_split_0, tensor attention_13_k_rotate_half_split_1 = split(axis = attention_13_k_rotate_half_split_axis_0, num_splits = attention_13_k_rotate_half_split_num_splits_0, x = attention_13_split_qkv_heads_1)[name = string("attention_13_k_rotate_half_split")]; fp16 attention_13_k_rotate_half_neg_y_0 = const()[name = string("attention_13_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_13_k_rotate_half_neg = mul(x = attention_13_k_rotate_half_split_1, y = attention_13_k_rotate_half_neg_y_0)[name = string("attention_13_k_rotate_half_neg")]; int32 attention_13_k_rotate_half_concat_axis_0 = const()[name = string("attention_13_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_13_k_rotate_half_concat_interleave_0 = const()[name = string("attention_13_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_13_k_rotate_half_concat = concat(axis = attention_13_k_rotate_half_concat_axis_0, interleave = attention_13_k_rotate_half_concat_interleave_0, values = (attention_13_k_rotate_half_neg, attention_13_k_rotate_half_split_0))[name = string("attention_13_k_rotate_half_concat")]; tensor attention_13_k_rope_rhs_mult = mul(x = attention_13_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_13_k_rope_rhs_mult")]; tensor attention_13_k_rope = add(x = attention_13_k_rope_lhs_mult, y = attention_13_k_rope_rhs_mult)[name = string("attention_13_k_rope")]; int32 attention_13_q_splits_axis_0 = const()[name = string("attention_13_q_splits_axis_0"), val = int32(1)]; int32 attention_13_q_splits_num_splits_0 = const()[name = string("attention_13_q_splits_num_splits_0"), val = int32(2)]; tensor attention_13_q_splits_0, tensor attention_13_q_splits_1 = split(axis = attention_13_q_splits_axis_0, num_splits = attention_13_q_splits_num_splits_0, x = attention_13_q_rope)[name = string("attention_13_q_splits")]; tensor attention_13_update_begin_0_values0_0 = const()[name = string("attention_13_update_begin_0_values0_0"), val = tensor([13])]; tensor attention_13_update_begin_0_values1_0 = const()[name = string("attention_13_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_13_update_begin_0_values3_0 = const()[name = string("attention_13_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_13_update_begin_0_axis_0 = const()[name = string("attention_13_update_begin_0_axis_0"), val = int32(0)]; bool attention_13_update_begin_0_interleave_0 = const()[name = string("attention_13_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_13_update_begin_0 = concat(axis = attention_13_update_begin_0_axis_0, interleave = attention_13_update_begin_0_interleave_0, values = (attention_13_update_begin_0_values0_0, attention_13_update_begin_0_values1_0, query_pos1, attention_13_update_begin_0_values3_0))[name = string("attention_13_update_begin_0")]; tensor attention_13_update_end_0_values0_0 = const()[name = string("attention_13_update_end_0_values0_0"), val = tensor([14])]; tensor attention_13_update_end_0_values1_0 = const()[name = string("attention_13_update_end_0_values1_0"), val = tensor([2])]; tensor attention_13_update_end_0_values3_0 = const()[name = string("attention_13_update_end_0_values3_0"), val = tensor([64])]; int32 attention_13_update_end_0_axis_0 = const()[name = string("attention_13_update_end_0_axis_0"), val = int32(0)]; bool attention_13_update_end_0_interleave_0 = const()[name = string("attention_13_update_end_0_interleave_0"), val = bool(false)]; tensor attention_13_update_end_0 = concat(axis = attention_13_update_end_0_axis_0, interleave = attention_13_update_end_0_interleave_0, values = (attention_13_update_end_0_values0_0, attention_13_update_end_0_values1_0, end_pos_0, attention_13_update_end_0_values3_0))[name = string("attention_13_update_end_0")]; tensor attention_13_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_updated_key_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_key_cache_0_squeeze_mask_0, update = attention_13_k_rope, x = coreml_update_state_24)[name = string("attention_13_updated_key_cache_0")]; write_state(data = attention_13_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_26 = read_state(input = key_cache_state)[name = string("coreml_update_state_74")]; tensor attention_13_key_cache_begin_0 = const()[name = string("attention_13_key_cache_begin_0"), val = tensor([13, 0, 0, 0])]; tensor attention_13_key_cache_end_0 = const()[name = string("attention_13_key_cache_end_0"), val = tensor([14, 2, 512, 64])]; tensor attention_13_key_cache_squeeze_mask_0 = const()[name = string("attention_13_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_key_cache = slice_by_index(begin = attention_13_key_cache_begin_0, end = attention_13_key_cache_end_0, squeeze_mask = attention_13_key_cache_squeeze_mask_0, x = coreml_update_state_26)[name = string("attention_13_key_cache")]; int32 attention_13_key_cache_head_axis_0 = const()[name = string("attention_13_key_cache_head_axis_0"), val = int32(1)]; int32 attention_13_key_cache_head_num_splits_0 = const()[name = string("attention_13_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_13_key_cache_head_0, tensor attention_13_key_cache_head_1 = split(axis = attention_13_key_cache_head_axis_0, num_splits = attention_13_key_cache_head_num_splits_0, x = attention_13_key_cache)[name = string("attention_13_key_cache_head")]; tensor attention_13_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_13_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_updated_value_cache_0 = slice_update(begin = attention_13_update_begin_0, end = attention_13_update_end_0, squeeze_mask = attention_13_updated_value_cache_0_squeeze_mask_0, update = attention_13_split_qkv_heads_2, x = coreml_update_state_25)[name = string("attention_13_updated_value_cache_0")]; write_state(data = attention_13_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_27 = read_state(input = value_cache_state)[name = string("coreml_update_state_75")]; tensor attention_13_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_13_slice_current_layer_value_cache_begin_0"), val = tensor([13, 0, 0, 0])]; tensor attention_13_slice_current_layer_value_cache_end_0 = const()[name = string("attention_13_slice_current_layer_value_cache_end_0"), val = tensor([14, 2, 512, 64])]; tensor attention_13_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_13_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_13_slice_current_layer_value_cache = slice_by_index(begin = attention_13_slice_current_layer_value_cache_begin_0, end = attention_13_slice_current_layer_value_cache_end_0, squeeze_mask = attention_13_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_27)[name = string("attention_13_slice_current_layer_value_cache")]; int32 attention_13_slice_value_cache_heads_axis_0 = const()[name = string("attention_13_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_13_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_13_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_13_slice_value_cache_heads_0, tensor attention_13_slice_value_cache_heads_1 = split(axis = attention_13_slice_value_cache_heads_axis_0, num_splits = attention_13_slice_value_cache_heads_num_splits_0, x = attention_13_slice_current_layer_value_cache)[name = string("attention_13_slice_value_cache_heads")]; bool attention_13_scores_0_transpose_y_0 = const()[name = string("attention_13_scores_0_transpose_y_0"), val = bool(true)]; bool attention_13_scores_0_transpose_x_0 = const()[name = string("attention_13_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_13_scores_0 = matmul(transpose_x = attention_13_scores_0_transpose_x_0, transpose_y = attention_13_scores_0_transpose_y_0, x = attention_13_key_cache_head_0, y = attention_13_q_splits_0)[name = string("attention_13_scores_0")]; fp16 attention_13_scaled_scores_0_y_0 = const()[name = string("attention_13_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_13_scaled_scores_0 = mul(x = attention_13_scores_0, y = attention_13_scaled_scores_0_y_0)[name = string("attention_13_scaled_scores_0")]; tensor attention_13_masked_scaled_scores_0 = add(x = attention_13_scaled_scores_0, y = transpose_0)[name = string("attention_13_masked_scaled_scores_0")]; int32 softmax_26_axis_0 = const()[name = string("softmax_26_axis_0"), val = int32(-2)]; tensor softmax_26 = softmax(axis = softmax_26_axis_0, x = attention_13_masked_scaled_scores_0)[name = string("softmax_26")]; bool attention_13_attention_0_transpose_x_0 = const()[name = string("attention_13_attention_0_transpose_x_0"), val = bool(true)]; bool attention_13_attention_0_transpose_y_0 = const()[name = string("attention_13_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_13_attention_0 = matmul(transpose_x = attention_13_attention_0_transpose_x_0, transpose_y = attention_13_attention_0_transpose_y_0, x = softmax_26, y = attention_13_slice_value_cache_heads_0)[name = string("attention_13_attention_0")]; bool attention_13_scores_1_transpose_y_0 = const()[name = string("attention_13_scores_1_transpose_y_0"), val = bool(true)]; bool attention_13_scores_1_transpose_x_0 = const()[name = string("attention_13_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_13_scores_1 = matmul(transpose_x = attention_13_scores_1_transpose_x_0, transpose_y = attention_13_scores_1_transpose_y_0, x = attention_13_key_cache_head_1, y = attention_13_q_splits_1)[name = string("attention_13_scores_1")]; fp16 attention_13_scaled_scores_1_y_0 = const()[name = string("attention_13_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_13_scaled_scores_1 = mul(x = attention_13_scores_1, y = attention_13_scaled_scores_1_y_0)[name = string("attention_13_scaled_scores_1")]; tensor attention_13_masked_scaled_scores_1 = add(x = attention_13_scaled_scores_1, y = transpose_0)[name = string("attention_13_masked_scaled_scores_1")]; int32 softmax_27_axis_0 = const()[name = string("softmax_27_axis_0"), val = int32(-2)]; tensor softmax_27 = softmax(axis = softmax_27_axis_0, x = attention_13_masked_scaled_scores_1)[name = string("softmax_27")]; bool attention_13_attention_1_transpose_x_0 = const()[name = string("attention_13_attention_1_transpose_x_0"), val = bool(true)]; bool attention_13_attention_1_transpose_y_0 = const()[name = string("attention_13_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_13_attention_1 = matmul(transpose_x = attention_13_attention_1_transpose_x_0, transpose_y = attention_13_attention_1_transpose_y_0, x = softmax_27, y = attention_13_slice_value_cache_heads_1)[name = string("attention_13_attention_1")]; int32 attention_13_concat_attention_all_heads_axis_0 = const()[name = string("attention_13_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_13_concat_attention_all_heads_interleave_0 = const()[name = string("attention_13_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_13_concat_attention_all_heads = concat(axis = attention_13_concat_attention_all_heads_axis_0, interleave = attention_13_concat_attention_all_heads_interleave_0, values = (attention_13_attention_0, attention_13_attention_1))[name = string("attention_13_concat_attention_all_heads")]; tensor attention_13_channels_first_retransposed_perm_0 = const()[name = string("attention_13_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_13_reshaped_shape_0 = const()[name = string("attention_13_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_13_channels_first_retransposed = transpose(perm = attention_13_channels_first_retransposed_perm_0, x = attention_13_concat_attention_all_heads)[name = string("transpose_21")]; tensor attention_13_reshaped = reshape(shape = attention_13_reshaped_shape_0, x = attention_13_channels_first_retransposed)[name = string("attention_13_reshaped")]; tensor attention_13_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443007616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443609792))))[name = string("attention_13_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_61 = constexpr_blockwise_shift_scale(data = attention_13_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443638528))))[name = string("constexpr_blockwise_shift_scale_61")]; tensor attention_13_outproj_strides_0 = const()[name = string("attention_13_outproj_strides_0"), val = tensor([1])]; string attention_13_outproj_pad_type_0 = const()[name = string("attention_13_outproj_pad_type_0"), val = string("valid")]; tensor attention_13_outproj_pad_0 = const()[name = string("attention_13_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_13_outproj_dilations_0 = const()[name = string("attention_13_outproj_dilations_0"), val = tensor([1])]; int32 attention_13_outproj_groups_0 = const()[name = string("attention_13_outproj_groups_0"), val = int32(1)]; tensor attention_13_outproj = conv(dilations = attention_13_outproj_dilations_0, groups = attention_13_outproj_groups_0, pad = attention_13_outproj_pad_0, pad_type = attention_13_outproj_pad_type_0, strides = attention_13_outproj_strides_0, weight = constexpr_blockwise_shift_scale_61, x = attention_13_reshaped)[name = string("attention_13_outproj")]; tensor block_13_residual_1 = add(x = block_12_residual_2, y = attention_13_outproj)[name = string("block_13_residual_1")]; tensor block_13_ffn_rmsnorm_abs = abs(x = block_13_residual_1)[name = string("block_13_ffn_rmsnorm_abs")]; tensor block_13_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_13_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_13_ffn_rmsnorm_maxval = reduce_max(axes = block_13_ffn_rmsnorm_maxval_axes_0, keep_dims = block_13_ffn_rmsnorm_maxval_keep_dims_0, x = block_13_ffn_rmsnorm_abs)[name = string("block_13_ffn_rmsnorm_maxval")]; fp16 block_13_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_13_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_13_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_13_ffn_rmsnorm_maxval_clipped = clip(alpha = block_13_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_13_ffn_rmsnorm_maxval_clipped_beta_0, x = block_13_ffn_rmsnorm_maxval)[name = string("block_13_ffn_rmsnorm_maxval_clipped")]; tensor block_13_ffn_rmsnorm_scaled = real_div(x = block_13_residual_1, y = block_13_ffn_rmsnorm_maxval_clipped)[name = string("block_13_ffn_rmsnorm_scaled")]; tensor block_13_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_13_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_13_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_13_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_13_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_13_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_13_ffn_rmsnorm_scaled)[name = string("block_13_ffn_rmsnorm_squared_sum")]; fp16 block_13_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_13_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_13_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_13_ffn_rmsnorm_rsqrt_epsilon_0, x = block_13_ffn_rmsnorm_squared_sum)[name = string("block_13_ffn_rmsnorm_rsqrt")]; fp16 block_13_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_13_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_13_ffn_rmsnorm_dim_scaled = mul(x = block_13_ffn_rmsnorm_scaled, y = block_13_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_13_ffn_rmsnorm_dim_scaled")]; tensor block_13_ffn_rmsnorm_normalized = mul(x = block_13_ffn_rmsnorm_dim_scaled, y = block_13_ffn_rmsnorm_rsqrt)[name = string("block_13_ffn_rmsnorm_normalized")]; tensor block_13_ffn_rmsnorm_y_0 = const()[name = string("block_13_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443640384)))]; tensor block_13_ffn_rmsnorm = mul(x = block_13_ffn_rmsnorm_normalized, y = block_13_ffn_rmsnorm_y_0)[name = string("block_13_ffn_rmsnorm")]; tensor block_13_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443642240))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446910912))))[name = string("block_13_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_62 = constexpr_blockwise_shift_scale(data = block_13_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447066624))))[name = string("constexpr_blockwise_shift_scale_62")]; tensor block_13_ffn_inproj_strides_0 = const()[name = string("block_13_ffn_inproj_strides_0"), val = tensor([1])]; string block_13_ffn_inproj_pad_type_0 = const()[name = string("block_13_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_13_ffn_inproj_pad_0 = const()[name = string("block_13_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_13_ffn_inproj_dilations_0 = const()[name = string("block_13_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_13_ffn_inproj_groups_0 = const()[name = string("block_13_ffn_inproj_groups_0"), val = int32(1)]; tensor block_13_ffn_inproj = conv(dilations = block_13_ffn_inproj_dilations_0, groups = block_13_ffn_inproj_groups_0, pad = block_13_ffn_inproj_pad_0, pad_type = block_13_ffn_inproj_pad_type_0, strides = block_13_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_62, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_inproj")]; tensor block_13_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(447076416))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450345088))))[name = string("block_13_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_63 = constexpr_blockwise_shift_scale(data = block_13_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450500800))))[name = string("constexpr_blockwise_shift_scale_63")]; tensor block_13_ffn_g_strides_0 = const()[name = string("block_13_ffn_g_strides_0"), val = tensor([1])]; string block_13_ffn_g_pad_type_0 = const()[name = string("block_13_ffn_g_pad_type_0"), val = string("valid")]; tensor block_13_ffn_g_pad_0 = const()[name = string("block_13_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_13_ffn_g_dilations_0 = const()[name = string("block_13_ffn_g_dilations_0"), val = tensor([1])]; int32 block_13_ffn_g_groups_0 = const()[name = string("block_13_ffn_g_groups_0"), val = int32(1)]; tensor block_13_ffn_g = conv(dilations = block_13_ffn_g_dilations_0, groups = block_13_ffn_g_groups_0, pad = block_13_ffn_g_pad_0, pad_type = block_13_ffn_g_pad_type_0, strides = block_13_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_63, x = block_13_ffn_rmsnorm)[name = string("block_13_ffn_g")]; tensor block_13_ffn_g_activation = silu(x = block_13_ffn_g)[name = string("block_13_ffn_g_activation")]; tensor block_13_ffn_x_gated = mul(x = block_13_ffn_inproj, y = block_13_ffn_g_activation)[name = string("block_13_ffn_x_gated")]; tensor block_13_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(450510592))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453779264))))[name = string("block_13_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_64 = constexpr_blockwise_shift_scale(data = block_13_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453808000))))[name = string("constexpr_blockwise_shift_scale_64")]; tensor block_13_ffn_outproj_strides_0 = const()[name = string("block_13_ffn_outproj_strides_0"), val = tensor([1])]; string block_13_ffn_outproj_pad_type_0 = const()[name = string("block_13_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_13_ffn_outproj_pad_0 = const()[name = string("block_13_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_13_ffn_outproj_dilations_0 = const()[name = string("block_13_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_13_ffn_outproj_groups_0 = const()[name = string("block_13_ffn_outproj_groups_0"), val = int32(1)]; tensor block_13_ffn_outproj = conv(dilations = block_13_ffn_outproj_dilations_0, groups = block_13_ffn_outproj_groups_0, pad = block_13_ffn_outproj_pad_0, pad_type = block_13_ffn_outproj_pad_type_0, strides = block_13_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_64, x = block_13_ffn_x_gated)[name = string("block_13_ffn_outproj")]; tensor block_13_residual_2 = add(x = block_13_ffn_outproj, y = block_13_residual_1)[name = string("block_13_residual_2")]; tensor block_14_attention_rmsnorm_abs = abs(x = block_13_residual_2)[name = string("block_14_attention_rmsnorm_abs")]; tensor block_14_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_14_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_14_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_14_attention_rmsnorm_maxval = reduce_max(axes = block_14_attention_rmsnorm_maxval_axes_0, keep_dims = block_14_attention_rmsnorm_maxval_keep_dims_0, x = block_14_attention_rmsnorm_abs)[name = string("block_14_attention_rmsnorm_maxval")]; fp16 block_14_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_14_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_14_attention_rmsnorm_maxval_clipped = clip(alpha = block_14_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_14_attention_rmsnorm_maxval_clipped_beta_0, x = block_14_attention_rmsnorm_maxval)[name = string("block_14_attention_rmsnorm_maxval_clipped")]; tensor block_14_attention_rmsnorm_scaled = real_div(x = block_13_residual_2, y = block_14_attention_rmsnorm_maxval_clipped)[name = string("block_14_attention_rmsnorm_scaled")]; tensor block_14_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_14_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_14_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_14_attention_rmsnorm_squared_sum_keep_dims_0, x = block_14_attention_rmsnorm_scaled)[name = string("block_14_attention_rmsnorm_squared_sum")]; fp16 block_14_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_14_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_14_attention_rmsnorm_rsqrt_epsilon_0, x = block_14_attention_rmsnorm_squared_sum)[name = string("block_14_attention_rmsnorm_rsqrt")]; fp16 block_14_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_14_attention_rmsnorm_dim_scaled = mul(x = block_14_attention_rmsnorm_scaled, y = block_14_attention_rmsnorm_dim_scaled_y_0)[name = string("block_14_attention_rmsnorm_dim_scaled")]; tensor block_14_attention_rmsnorm_normalized = mul(x = block_14_attention_rmsnorm_dim_scaled, y = block_14_attention_rmsnorm_rsqrt)[name = string("block_14_attention_rmsnorm_normalized")]; tensor block_14_attention_rmsnorm_y_0 = const()[name = string("block_14_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453809856)))]; tensor block_14_attention_rmsnorm = mul(x = block_14_attention_rmsnorm_normalized, y = block_14_attention_rmsnorm_y_0)[name = string("block_14_attention_rmsnorm")]; tensor attention_14_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(453811712))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454585920))))[name = string("attention_14_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_65 = constexpr_blockwise_shift_scale(data = attention_14_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454622848))))[name = string("constexpr_blockwise_shift_scale_65")]; tensor attention_14_qkvproj_bias_0 = const()[name = string("attention_14_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454625216)))]; tensor attention_14_qkvproj_strides_0 = const()[name = string("attention_14_qkvproj_strides_0"), val = tensor([1])]; string attention_14_qkvproj_pad_type_0 = const()[name = string("attention_14_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_14_qkvproj_pad_0 = const()[name = string("attention_14_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_14_qkvproj_dilations_0 = const()[name = string("attention_14_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_14_qkvproj_groups_0 = const()[name = string("attention_14_qkvproj_groups_0"), val = int32(1)]; tensor attention_14_qkvproj = conv(bias = attention_14_qkvproj_bias_0, dilations = attention_14_qkvproj_dilations_0, groups = attention_14_qkvproj_groups_0, pad = attention_14_qkvproj_pad_0, pad_type = attention_14_qkvproj_pad_type_0, strides = attention_14_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_65, x = block_14_attention_rmsnorm)[name = string("attention_14_qkvproj")]; tensor attention_14_head_reshape_shape_0 = const()[name = string("attention_14_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_14_head_reshape = reshape(shape = attention_14_head_reshape_shape_0, x = attention_14_qkvproj)[name = string("attention_14_head_reshape")]; tensor attention_14_head_transpose_perm_0 = const()[name = string("attention_14_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_14_split_qkv_heads_axis_0 = const()[name = string("attention_14_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_14_split_qkv_heads_split_sizes_0 = const()[name = string("attention_14_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_14_head_transpose = transpose(perm = attention_14_head_transpose_perm_0, x = attention_14_head_reshape)[name = string("transpose_20")]; tensor attention_14_split_qkv_heads_0, tensor attention_14_split_qkv_heads_1, tensor attention_14_split_qkv_heads_2 = split(axis = attention_14_split_qkv_heads_axis_0, split_sizes = attention_14_split_qkv_heads_split_sizes_0, x = attention_14_head_transpose)[name = string("attention_14_split_qkv_heads")]; tensor attention_14_q_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_14_q_rope_lhs_mult")]; int32 attention_14_q_rotate_half_split_num_splits_0 = const()[name = string("attention_14_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_14_q_rotate_half_split_axis_0 = const()[name = string("attention_14_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_14_q_rotate_half_split_0, tensor attention_14_q_rotate_half_split_1 = split(axis = attention_14_q_rotate_half_split_axis_0, num_splits = attention_14_q_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_0)[name = string("attention_14_q_rotate_half_split")]; fp16 attention_14_q_rotate_half_neg_y_0 = const()[name = string("attention_14_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_14_q_rotate_half_neg = mul(x = attention_14_q_rotate_half_split_1, y = attention_14_q_rotate_half_neg_y_0)[name = string("attention_14_q_rotate_half_neg")]; int32 attention_14_q_rotate_half_concat_axis_0 = const()[name = string("attention_14_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_14_q_rotate_half_concat_interleave_0 = const()[name = string("attention_14_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_14_q_rotate_half_concat = concat(axis = attention_14_q_rotate_half_concat_axis_0, interleave = attention_14_q_rotate_half_concat_interleave_0, values = (attention_14_q_rotate_half_neg, attention_14_q_rotate_half_split_0))[name = string("attention_14_q_rotate_half_concat")]; tensor attention_14_q_rope_rhs_mult = mul(x = attention_14_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_q_rope_rhs_mult")]; tensor attention_14_q_rope = add(x = attention_14_q_rope_lhs_mult, y = attention_14_q_rope_rhs_mult)[name = string("attention_14_q_rope")]; tensor attention_14_k_rope_lhs_mult = mul(x = attention_14_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_14_k_rope_lhs_mult")]; int32 attention_14_k_rotate_half_split_num_splits_0 = const()[name = string("attention_14_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_14_k_rotate_half_split_axis_0 = const()[name = string("attention_14_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_14_k_rotate_half_split_0, tensor attention_14_k_rotate_half_split_1 = split(axis = attention_14_k_rotate_half_split_axis_0, num_splits = attention_14_k_rotate_half_split_num_splits_0, x = attention_14_split_qkv_heads_1)[name = string("attention_14_k_rotate_half_split")]; fp16 attention_14_k_rotate_half_neg_y_0 = const()[name = string("attention_14_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_14_k_rotate_half_neg = mul(x = attention_14_k_rotate_half_split_1, y = attention_14_k_rotate_half_neg_y_0)[name = string("attention_14_k_rotate_half_neg")]; int32 attention_14_k_rotate_half_concat_axis_0 = const()[name = string("attention_14_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_14_k_rotate_half_concat_interleave_0 = const()[name = string("attention_14_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_14_k_rotate_half_concat = concat(axis = attention_14_k_rotate_half_concat_axis_0, interleave = attention_14_k_rotate_half_concat_interleave_0, values = (attention_14_k_rotate_half_neg, attention_14_k_rotate_half_split_0))[name = string("attention_14_k_rotate_half_concat")]; tensor attention_14_k_rope_rhs_mult = mul(x = attention_14_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_14_k_rope_rhs_mult")]; tensor attention_14_k_rope = add(x = attention_14_k_rope_lhs_mult, y = attention_14_k_rope_rhs_mult)[name = string("attention_14_k_rope")]; int32 attention_14_q_splits_axis_0 = const()[name = string("attention_14_q_splits_axis_0"), val = int32(1)]; int32 attention_14_q_splits_num_splits_0 = const()[name = string("attention_14_q_splits_num_splits_0"), val = int32(2)]; tensor attention_14_q_splits_0, tensor attention_14_q_splits_1 = split(axis = attention_14_q_splits_axis_0, num_splits = attention_14_q_splits_num_splits_0, x = attention_14_q_rope)[name = string("attention_14_q_splits")]; tensor attention_14_update_begin_0_values0_0 = const()[name = string("attention_14_update_begin_0_values0_0"), val = tensor([14])]; tensor attention_14_update_begin_0_values1_0 = const()[name = string("attention_14_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_14_update_begin_0_values3_0 = const()[name = string("attention_14_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_14_update_begin_0_axis_0 = const()[name = string("attention_14_update_begin_0_axis_0"), val = int32(0)]; bool attention_14_update_begin_0_interleave_0 = const()[name = string("attention_14_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_14_update_begin_0 = concat(axis = attention_14_update_begin_0_axis_0, interleave = attention_14_update_begin_0_interleave_0, values = (attention_14_update_begin_0_values0_0, attention_14_update_begin_0_values1_0, query_pos1, attention_14_update_begin_0_values3_0))[name = string("attention_14_update_begin_0")]; tensor attention_14_update_end_0_values0_0 = const()[name = string("attention_14_update_end_0_values0_0"), val = tensor([15])]; tensor attention_14_update_end_0_values1_0 = const()[name = string("attention_14_update_end_0_values1_0"), val = tensor([2])]; tensor attention_14_update_end_0_values3_0 = const()[name = string("attention_14_update_end_0_values3_0"), val = tensor([64])]; int32 attention_14_update_end_0_axis_0 = const()[name = string("attention_14_update_end_0_axis_0"), val = int32(0)]; bool attention_14_update_end_0_interleave_0 = const()[name = string("attention_14_update_end_0_interleave_0"), val = bool(false)]; tensor attention_14_update_end_0 = concat(axis = attention_14_update_end_0_axis_0, interleave = attention_14_update_end_0_interleave_0, values = (attention_14_update_end_0_values0_0, attention_14_update_end_0_values1_0, end_pos_0, attention_14_update_end_0_values3_0))[name = string("attention_14_update_end_0")]; tensor attention_14_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_updated_key_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_key_cache_0_squeeze_mask_0, update = attention_14_k_rope, x = coreml_update_state_26)[name = string("attention_14_updated_key_cache_0")]; write_state(data = attention_14_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_28 = read_state(input = key_cache_state)[name = string("coreml_update_state_76")]; tensor attention_14_key_cache_begin_0 = const()[name = string("attention_14_key_cache_begin_0"), val = tensor([14, 0, 0, 0])]; tensor attention_14_key_cache_end_0 = const()[name = string("attention_14_key_cache_end_0"), val = tensor([15, 2, 512, 64])]; tensor attention_14_key_cache_squeeze_mask_0 = const()[name = string("attention_14_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_key_cache = slice_by_index(begin = attention_14_key_cache_begin_0, end = attention_14_key_cache_end_0, squeeze_mask = attention_14_key_cache_squeeze_mask_0, x = coreml_update_state_28)[name = string("attention_14_key_cache")]; int32 attention_14_key_cache_head_axis_0 = const()[name = string("attention_14_key_cache_head_axis_0"), val = int32(1)]; int32 attention_14_key_cache_head_num_splits_0 = const()[name = string("attention_14_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_14_key_cache_head_0, tensor attention_14_key_cache_head_1 = split(axis = attention_14_key_cache_head_axis_0, num_splits = attention_14_key_cache_head_num_splits_0, x = attention_14_key_cache)[name = string("attention_14_key_cache_head")]; tensor attention_14_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_14_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_updated_value_cache_0 = slice_update(begin = attention_14_update_begin_0, end = attention_14_update_end_0, squeeze_mask = attention_14_updated_value_cache_0_squeeze_mask_0, update = attention_14_split_qkv_heads_2, x = coreml_update_state_27)[name = string("attention_14_updated_value_cache_0")]; write_state(data = attention_14_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_29 = read_state(input = value_cache_state)[name = string("coreml_update_state_77")]; tensor attention_14_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_14_slice_current_layer_value_cache_begin_0"), val = tensor([14, 0, 0, 0])]; tensor attention_14_slice_current_layer_value_cache_end_0 = const()[name = string("attention_14_slice_current_layer_value_cache_end_0"), val = tensor([15, 2, 512, 64])]; tensor attention_14_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_14_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_14_slice_current_layer_value_cache = slice_by_index(begin = attention_14_slice_current_layer_value_cache_begin_0, end = attention_14_slice_current_layer_value_cache_end_0, squeeze_mask = attention_14_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_29)[name = string("attention_14_slice_current_layer_value_cache")]; int32 attention_14_slice_value_cache_heads_axis_0 = const()[name = string("attention_14_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_14_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_14_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_14_slice_value_cache_heads_0, tensor attention_14_slice_value_cache_heads_1 = split(axis = attention_14_slice_value_cache_heads_axis_0, num_splits = attention_14_slice_value_cache_heads_num_splits_0, x = attention_14_slice_current_layer_value_cache)[name = string("attention_14_slice_value_cache_heads")]; bool attention_14_scores_0_transpose_y_0 = const()[name = string("attention_14_scores_0_transpose_y_0"), val = bool(true)]; bool attention_14_scores_0_transpose_x_0 = const()[name = string("attention_14_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_14_scores_0 = matmul(transpose_x = attention_14_scores_0_transpose_x_0, transpose_y = attention_14_scores_0_transpose_y_0, x = attention_14_key_cache_head_0, y = attention_14_q_splits_0)[name = string("attention_14_scores_0")]; fp16 attention_14_scaled_scores_0_y_0 = const()[name = string("attention_14_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_14_scaled_scores_0 = mul(x = attention_14_scores_0, y = attention_14_scaled_scores_0_y_0)[name = string("attention_14_scaled_scores_0")]; tensor attention_14_masked_scaled_scores_0 = add(x = attention_14_scaled_scores_0, y = transpose_0)[name = string("attention_14_masked_scaled_scores_0")]; int32 softmax_28_axis_0 = const()[name = string("softmax_28_axis_0"), val = int32(-2)]; tensor softmax_28 = softmax(axis = softmax_28_axis_0, x = attention_14_masked_scaled_scores_0)[name = string("softmax_28")]; bool attention_14_attention_0_transpose_x_0 = const()[name = string("attention_14_attention_0_transpose_x_0"), val = bool(true)]; bool attention_14_attention_0_transpose_y_0 = const()[name = string("attention_14_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_14_attention_0 = matmul(transpose_x = attention_14_attention_0_transpose_x_0, transpose_y = attention_14_attention_0_transpose_y_0, x = softmax_28, y = attention_14_slice_value_cache_heads_0)[name = string("attention_14_attention_0")]; bool attention_14_scores_1_transpose_y_0 = const()[name = string("attention_14_scores_1_transpose_y_0"), val = bool(true)]; bool attention_14_scores_1_transpose_x_0 = const()[name = string("attention_14_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_14_scores_1 = matmul(transpose_x = attention_14_scores_1_transpose_x_0, transpose_y = attention_14_scores_1_transpose_y_0, x = attention_14_key_cache_head_1, y = attention_14_q_splits_1)[name = string("attention_14_scores_1")]; fp16 attention_14_scaled_scores_1_y_0 = const()[name = string("attention_14_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_14_scaled_scores_1 = mul(x = attention_14_scores_1, y = attention_14_scaled_scores_1_y_0)[name = string("attention_14_scaled_scores_1")]; tensor attention_14_masked_scaled_scores_1 = add(x = attention_14_scaled_scores_1, y = transpose_0)[name = string("attention_14_masked_scaled_scores_1")]; int32 softmax_29_axis_0 = const()[name = string("softmax_29_axis_0"), val = int32(-2)]; tensor softmax_29 = softmax(axis = softmax_29_axis_0, x = attention_14_masked_scaled_scores_1)[name = string("softmax_29")]; bool attention_14_attention_1_transpose_x_0 = const()[name = string("attention_14_attention_1_transpose_x_0"), val = bool(true)]; bool attention_14_attention_1_transpose_y_0 = const()[name = string("attention_14_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_14_attention_1 = matmul(transpose_x = attention_14_attention_1_transpose_x_0, transpose_y = attention_14_attention_1_transpose_y_0, x = softmax_29, y = attention_14_slice_value_cache_heads_1)[name = string("attention_14_attention_1")]; int32 attention_14_concat_attention_all_heads_axis_0 = const()[name = string("attention_14_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_14_concat_attention_all_heads_interleave_0 = const()[name = string("attention_14_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_14_concat_attention_all_heads = concat(axis = attention_14_concat_attention_all_heads_axis_0, interleave = attention_14_concat_attention_all_heads_interleave_0, values = (attention_14_attention_0, attention_14_attention_1))[name = string("attention_14_concat_attention_all_heads")]; tensor attention_14_channels_first_retransposed_perm_0 = const()[name = string("attention_14_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_14_reshaped_shape_0 = const()[name = string("attention_14_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_14_channels_first_retransposed = transpose(perm = attention_14_channels_first_retransposed_perm_0, x = attention_14_concat_attention_all_heads)[name = string("transpose_19")]; tensor attention_14_reshaped = reshape(shape = attention_14_reshaped_shape_0, x = attention_14_channels_first_retransposed)[name = string("attention_14_reshaped")]; tensor attention_14_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454627584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455229760))))[name = string("attention_14_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_66 = constexpr_blockwise_shift_scale(data = attention_14_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455258496))))[name = string("constexpr_blockwise_shift_scale_66")]; tensor attention_14_outproj_strides_0 = const()[name = string("attention_14_outproj_strides_0"), val = tensor([1])]; string attention_14_outproj_pad_type_0 = const()[name = string("attention_14_outproj_pad_type_0"), val = string("valid")]; tensor attention_14_outproj_pad_0 = const()[name = string("attention_14_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_14_outproj_dilations_0 = const()[name = string("attention_14_outproj_dilations_0"), val = tensor([1])]; int32 attention_14_outproj_groups_0 = const()[name = string("attention_14_outproj_groups_0"), val = int32(1)]; tensor attention_14_outproj = conv(dilations = attention_14_outproj_dilations_0, groups = attention_14_outproj_groups_0, pad = attention_14_outproj_pad_0, pad_type = attention_14_outproj_pad_type_0, strides = attention_14_outproj_strides_0, weight = constexpr_blockwise_shift_scale_66, x = attention_14_reshaped)[name = string("attention_14_outproj")]; tensor block_14_residual_1 = add(x = block_13_residual_2, y = attention_14_outproj)[name = string("block_14_residual_1")]; tensor block_14_ffn_rmsnorm_abs = abs(x = block_14_residual_1)[name = string("block_14_ffn_rmsnorm_abs")]; tensor block_14_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_14_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_14_ffn_rmsnorm_maxval = reduce_max(axes = block_14_ffn_rmsnorm_maxval_axes_0, keep_dims = block_14_ffn_rmsnorm_maxval_keep_dims_0, x = block_14_ffn_rmsnorm_abs)[name = string("block_14_ffn_rmsnorm_maxval")]; fp16 block_14_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_14_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_14_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_14_ffn_rmsnorm_maxval_clipped = clip(alpha = block_14_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_14_ffn_rmsnorm_maxval_clipped_beta_0, x = block_14_ffn_rmsnorm_maxval)[name = string("block_14_ffn_rmsnorm_maxval_clipped")]; tensor block_14_ffn_rmsnorm_scaled = real_div(x = block_14_residual_1, y = block_14_ffn_rmsnorm_maxval_clipped)[name = string("block_14_ffn_rmsnorm_scaled")]; tensor block_14_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_14_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_14_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_14_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_14_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_14_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_14_ffn_rmsnorm_scaled)[name = string("block_14_ffn_rmsnorm_squared_sum")]; fp16 block_14_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_14_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_14_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_14_ffn_rmsnorm_rsqrt_epsilon_0, x = block_14_ffn_rmsnorm_squared_sum)[name = string("block_14_ffn_rmsnorm_rsqrt")]; fp16 block_14_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_14_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_14_ffn_rmsnorm_dim_scaled = mul(x = block_14_ffn_rmsnorm_scaled, y = block_14_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_14_ffn_rmsnorm_dim_scaled")]; tensor block_14_ffn_rmsnorm_normalized = mul(x = block_14_ffn_rmsnorm_dim_scaled, y = block_14_ffn_rmsnorm_rsqrt)[name = string("block_14_ffn_rmsnorm_normalized")]; tensor block_14_ffn_rmsnorm_y_0 = const()[name = string("block_14_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455260352)))]; tensor block_14_ffn_rmsnorm = mul(x = block_14_ffn_rmsnorm_normalized, y = block_14_ffn_rmsnorm_y_0)[name = string("block_14_ffn_rmsnorm")]; tensor block_14_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455262208))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458530880))))[name = string("block_14_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_67 = constexpr_blockwise_shift_scale(data = block_14_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458686592))))[name = string("constexpr_blockwise_shift_scale_67")]; tensor block_14_ffn_inproj_strides_0 = const()[name = string("block_14_ffn_inproj_strides_0"), val = tensor([1])]; string block_14_ffn_inproj_pad_type_0 = const()[name = string("block_14_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_14_ffn_inproj_pad_0 = const()[name = string("block_14_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_14_ffn_inproj_dilations_0 = const()[name = string("block_14_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_14_ffn_inproj_groups_0 = const()[name = string("block_14_ffn_inproj_groups_0"), val = int32(1)]; tensor block_14_ffn_inproj = conv(dilations = block_14_ffn_inproj_dilations_0, groups = block_14_ffn_inproj_groups_0, pad = block_14_ffn_inproj_pad_0, pad_type = block_14_ffn_inproj_pad_type_0, strides = block_14_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_67, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_inproj")]; tensor block_14_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458696384))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461965056))))[name = string("block_14_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_68 = constexpr_blockwise_shift_scale(data = block_14_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462120768))))[name = string("constexpr_blockwise_shift_scale_68")]; tensor block_14_ffn_g_strides_0 = const()[name = string("block_14_ffn_g_strides_0"), val = tensor([1])]; string block_14_ffn_g_pad_type_0 = const()[name = string("block_14_ffn_g_pad_type_0"), val = string("valid")]; tensor block_14_ffn_g_pad_0 = const()[name = string("block_14_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_14_ffn_g_dilations_0 = const()[name = string("block_14_ffn_g_dilations_0"), val = tensor([1])]; int32 block_14_ffn_g_groups_0 = const()[name = string("block_14_ffn_g_groups_0"), val = int32(1)]; tensor block_14_ffn_g = conv(dilations = block_14_ffn_g_dilations_0, groups = block_14_ffn_g_groups_0, pad = block_14_ffn_g_pad_0, pad_type = block_14_ffn_g_pad_type_0, strides = block_14_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_68, x = block_14_ffn_rmsnorm)[name = string("block_14_ffn_g")]; tensor block_14_ffn_g_activation = silu(x = block_14_ffn_g)[name = string("block_14_ffn_g_activation")]; tensor block_14_ffn_x_gated = mul(x = block_14_ffn_inproj, y = block_14_ffn_g_activation)[name = string("block_14_ffn_x_gated")]; tensor block_14_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462130560))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465399232))))[name = string("block_14_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_69 = constexpr_blockwise_shift_scale(data = block_14_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465427968))))[name = string("constexpr_blockwise_shift_scale_69")]; tensor block_14_ffn_outproj_strides_0 = const()[name = string("block_14_ffn_outproj_strides_0"), val = tensor([1])]; string block_14_ffn_outproj_pad_type_0 = const()[name = string("block_14_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_14_ffn_outproj_pad_0 = const()[name = string("block_14_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_14_ffn_outproj_dilations_0 = const()[name = string("block_14_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_14_ffn_outproj_groups_0 = const()[name = string("block_14_ffn_outproj_groups_0"), val = int32(1)]; tensor block_14_ffn_outproj = conv(dilations = block_14_ffn_outproj_dilations_0, groups = block_14_ffn_outproj_groups_0, pad = block_14_ffn_outproj_pad_0, pad_type = block_14_ffn_outproj_pad_type_0, strides = block_14_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_69, x = block_14_ffn_x_gated)[name = string("block_14_ffn_outproj")]; tensor block_14_residual_2 = add(x = block_14_ffn_outproj, y = block_14_residual_1)[name = string("block_14_residual_2")]; tensor block_15_attention_rmsnorm_abs = abs(x = block_14_residual_2)[name = string("block_15_attention_rmsnorm_abs")]; tensor block_15_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_15_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_15_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_15_attention_rmsnorm_maxval = reduce_max(axes = block_15_attention_rmsnorm_maxval_axes_0, keep_dims = block_15_attention_rmsnorm_maxval_keep_dims_0, x = block_15_attention_rmsnorm_abs)[name = string("block_15_attention_rmsnorm_maxval")]; fp16 block_15_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_15_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_15_attention_rmsnorm_maxval_clipped = clip(alpha = block_15_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_15_attention_rmsnorm_maxval_clipped_beta_0, x = block_15_attention_rmsnorm_maxval)[name = string("block_15_attention_rmsnorm_maxval_clipped")]; tensor block_15_attention_rmsnorm_scaled = real_div(x = block_14_residual_2, y = block_15_attention_rmsnorm_maxval_clipped)[name = string("block_15_attention_rmsnorm_scaled")]; tensor block_15_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_15_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_15_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_15_attention_rmsnorm_squared_sum_keep_dims_0, x = block_15_attention_rmsnorm_scaled)[name = string("block_15_attention_rmsnorm_squared_sum")]; fp16 block_15_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_15_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_15_attention_rmsnorm_rsqrt_epsilon_0, x = block_15_attention_rmsnorm_squared_sum)[name = string("block_15_attention_rmsnorm_rsqrt")]; fp16 block_15_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_15_attention_rmsnorm_dim_scaled = mul(x = block_15_attention_rmsnorm_scaled, y = block_15_attention_rmsnorm_dim_scaled_y_0)[name = string("block_15_attention_rmsnorm_dim_scaled")]; tensor block_15_attention_rmsnorm_normalized = mul(x = block_15_attention_rmsnorm_dim_scaled, y = block_15_attention_rmsnorm_rsqrt)[name = string("block_15_attention_rmsnorm_normalized")]; tensor block_15_attention_rmsnorm_y_0 = const()[name = string("block_15_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465429824)))]; tensor block_15_attention_rmsnorm = mul(x = block_15_attention_rmsnorm_normalized, y = block_15_attention_rmsnorm_y_0)[name = string("block_15_attention_rmsnorm")]; tensor attention_15_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(465431680))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466205888))))[name = string("attention_15_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_70 = constexpr_blockwise_shift_scale(data = attention_15_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466242816))))[name = string("constexpr_blockwise_shift_scale_70")]; tensor attention_15_qkvproj_bias_0 = const()[name = string("attention_15_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466245184)))]; tensor attention_15_qkvproj_strides_0 = const()[name = string("attention_15_qkvproj_strides_0"), val = tensor([1])]; string attention_15_qkvproj_pad_type_0 = const()[name = string("attention_15_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_15_qkvproj_pad_0 = const()[name = string("attention_15_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_15_qkvproj_dilations_0 = const()[name = string("attention_15_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_15_qkvproj_groups_0 = const()[name = string("attention_15_qkvproj_groups_0"), val = int32(1)]; tensor attention_15_qkvproj = conv(bias = attention_15_qkvproj_bias_0, dilations = attention_15_qkvproj_dilations_0, groups = attention_15_qkvproj_groups_0, pad = attention_15_qkvproj_pad_0, pad_type = attention_15_qkvproj_pad_type_0, strides = attention_15_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_70, x = block_15_attention_rmsnorm)[name = string("attention_15_qkvproj")]; tensor attention_15_head_reshape_shape_0 = const()[name = string("attention_15_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_15_head_reshape = reshape(shape = attention_15_head_reshape_shape_0, x = attention_15_qkvproj)[name = string("attention_15_head_reshape")]; tensor attention_15_head_transpose_perm_0 = const()[name = string("attention_15_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_15_split_qkv_heads_axis_0 = const()[name = string("attention_15_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_15_split_qkv_heads_split_sizes_0 = const()[name = string("attention_15_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_15_head_transpose = transpose(perm = attention_15_head_transpose_perm_0, x = attention_15_head_reshape)[name = string("transpose_18")]; tensor attention_15_split_qkv_heads_0, tensor attention_15_split_qkv_heads_1, tensor attention_15_split_qkv_heads_2 = split(axis = attention_15_split_qkv_heads_axis_0, split_sizes = attention_15_split_qkv_heads_split_sizes_0, x = attention_15_head_transpose)[name = string("attention_15_split_qkv_heads")]; tensor attention_15_q_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_15_q_rope_lhs_mult")]; int32 attention_15_q_rotate_half_split_num_splits_0 = const()[name = string("attention_15_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_15_q_rotate_half_split_axis_0 = const()[name = string("attention_15_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_15_q_rotate_half_split_0, tensor attention_15_q_rotate_half_split_1 = split(axis = attention_15_q_rotate_half_split_axis_0, num_splits = attention_15_q_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_0)[name = string("attention_15_q_rotate_half_split")]; fp16 attention_15_q_rotate_half_neg_y_0 = const()[name = string("attention_15_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_15_q_rotate_half_neg = mul(x = attention_15_q_rotate_half_split_1, y = attention_15_q_rotate_half_neg_y_0)[name = string("attention_15_q_rotate_half_neg")]; int32 attention_15_q_rotate_half_concat_axis_0 = const()[name = string("attention_15_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_15_q_rotate_half_concat_interleave_0 = const()[name = string("attention_15_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_15_q_rotate_half_concat = concat(axis = attention_15_q_rotate_half_concat_axis_0, interleave = attention_15_q_rotate_half_concat_interleave_0, values = (attention_15_q_rotate_half_neg, attention_15_q_rotate_half_split_0))[name = string("attention_15_q_rotate_half_concat")]; tensor attention_15_q_rope_rhs_mult = mul(x = attention_15_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_q_rope_rhs_mult")]; tensor attention_15_q_rope = add(x = attention_15_q_rope_lhs_mult, y = attention_15_q_rope_rhs_mult)[name = string("attention_15_q_rope")]; tensor attention_15_k_rope_lhs_mult = mul(x = attention_15_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_15_k_rope_lhs_mult")]; int32 attention_15_k_rotate_half_split_num_splits_0 = const()[name = string("attention_15_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_15_k_rotate_half_split_axis_0 = const()[name = string("attention_15_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_15_k_rotate_half_split_0, tensor attention_15_k_rotate_half_split_1 = split(axis = attention_15_k_rotate_half_split_axis_0, num_splits = attention_15_k_rotate_half_split_num_splits_0, x = attention_15_split_qkv_heads_1)[name = string("attention_15_k_rotate_half_split")]; fp16 attention_15_k_rotate_half_neg_y_0 = const()[name = string("attention_15_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_15_k_rotate_half_neg = mul(x = attention_15_k_rotate_half_split_1, y = attention_15_k_rotate_half_neg_y_0)[name = string("attention_15_k_rotate_half_neg")]; int32 attention_15_k_rotate_half_concat_axis_0 = const()[name = string("attention_15_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_15_k_rotate_half_concat_interleave_0 = const()[name = string("attention_15_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_15_k_rotate_half_concat = concat(axis = attention_15_k_rotate_half_concat_axis_0, interleave = attention_15_k_rotate_half_concat_interleave_0, values = (attention_15_k_rotate_half_neg, attention_15_k_rotate_half_split_0))[name = string("attention_15_k_rotate_half_concat")]; tensor attention_15_k_rope_rhs_mult = mul(x = attention_15_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_15_k_rope_rhs_mult")]; tensor attention_15_k_rope = add(x = attention_15_k_rope_lhs_mult, y = attention_15_k_rope_rhs_mult)[name = string("attention_15_k_rope")]; int32 attention_15_q_splits_axis_0 = const()[name = string("attention_15_q_splits_axis_0"), val = int32(1)]; int32 attention_15_q_splits_num_splits_0 = const()[name = string("attention_15_q_splits_num_splits_0"), val = int32(2)]; tensor attention_15_q_splits_0, tensor attention_15_q_splits_1 = split(axis = attention_15_q_splits_axis_0, num_splits = attention_15_q_splits_num_splits_0, x = attention_15_q_rope)[name = string("attention_15_q_splits")]; tensor attention_15_update_begin_0_values0_0 = const()[name = string("attention_15_update_begin_0_values0_0"), val = tensor([15])]; tensor attention_15_update_begin_0_values1_0 = const()[name = string("attention_15_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_15_update_begin_0_values3_0 = const()[name = string("attention_15_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_15_update_begin_0_axis_0 = const()[name = string("attention_15_update_begin_0_axis_0"), val = int32(0)]; bool attention_15_update_begin_0_interleave_0 = const()[name = string("attention_15_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_15_update_begin_0 = concat(axis = attention_15_update_begin_0_axis_0, interleave = attention_15_update_begin_0_interleave_0, values = (attention_15_update_begin_0_values0_0, attention_15_update_begin_0_values1_0, query_pos1, attention_15_update_begin_0_values3_0))[name = string("attention_15_update_begin_0")]; tensor attention_15_update_end_0_values0_0 = const()[name = string("attention_15_update_end_0_values0_0"), val = tensor([16])]; tensor attention_15_update_end_0_values1_0 = const()[name = string("attention_15_update_end_0_values1_0"), val = tensor([2])]; tensor attention_15_update_end_0_values3_0 = const()[name = string("attention_15_update_end_0_values3_0"), val = tensor([64])]; int32 attention_15_update_end_0_axis_0 = const()[name = string("attention_15_update_end_0_axis_0"), val = int32(0)]; bool attention_15_update_end_0_interleave_0 = const()[name = string("attention_15_update_end_0_interleave_0"), val = bool(false)]; tensor attention_15_update_end_0 = concat(axis = attention_15_update_end_0_axis_0, interleave = attention_15_update_end_0_interleave_0, values = (attention_15_update_end_0_values0_0, attention_15_update_end_0_values1_0, end_pos_0, attention_15_update_end_0_values3_0))[name = string("attention_15_update_end_0")]; tensor attention_15_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_updated_key_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_key_cache_0_squeeze_mask_0, update = attention_15_k_rope, x = coreml_update_state_28)[name = string("attention_15_updated_key_cache_0")]; write_state(data = attention_15_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_30 = read_state(input = key_cache_state)[name = string("coreml_update_state_78")]; tensor attention_15_key_cache_begin_0 = const()[name = string("attention_15_key_cache_begin_0"), val = tensor([15, 0, 0, 0])]; tensor attention_15_key_cache_end_0 = const()[name = string("attention_15_key_cache_end_0"), val = tensor([16, 2, 512, 64])]; tensor attention_15_key_cache_squeeze_mask_0 = const()[name = string("attention_15_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_key_cache = slice_by_index(begin = attention_15_key_cache_begin_0, end = attention_15_key_cache_end_0, squeeze_mask = attention_15_key_cache_squeeze_mask_0, x = coreml_update_state_30)[name = string("attention_15_key_cache")]; int32 attention_15_key_cache_head_axis_0 = const()[name = string("attention_15_key_cache_head_axis_0"), val = int32(1)]; int32 attention_15_key_cache_head_num_splits_0 = const()[name = string("attention_15_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_15_key_cache_head_0, tensor attention_15_key_cache_head_1 = split(axis = attention_15_key_cache_head_axis_0, num_splits = attention_15_key_cache_head_num_splits_0, x = attention_15_key_cache)[name = string("attention_15_key_cache_head")]; tensor attention_15_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_15_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_updated_value_cache_0 = slice_update(begin = attention_15_update_begin_0, end = attention_15_update_end_0, squeeze_mask = attention_15_updated_value_cache_0_squeeze_mask_0, update = attention_15_split_qkv_heads_2, x = coreml_update_state_29)[name = string("attention_15_updated_value_cache_0")]; write_state(data = attention_15_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_31 = read_state(input = value_cache_state)[name = string("coreml_update_state_79")]; tensor attention_15_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_15_slice_current_layer_value_cache_begin_0"), val = tensor([15, 0, 0, 0])]; tensor attention_15_slice_current_layer_value_cache_end_0 = const()[name = string("attention_15_slice_current_layer_value_cache_end_0"), val = tensor([16, 2, 512, 64])]; tensor attention_15_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_15_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_15_slice_current_layer_value_cache = slice_by_index(begin = attention_15_slice_current_layer_value_cache_begin_0, end = attention_15_slice_current_layer_value_cache_end_0, squeeze_mask = attention_15_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_31)[name = string("attention_15_slice_current_layer_value_cache")]; int32 attention_15_slice_value_cache_heads_axis_0 = const()[name = string("attention_15_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_15_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_15_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_15_slice_value_cache_heads_0, tensor attention_15_slice_value_cache_heads_1 = split(axis = attention_15_slice_value_cache_heads_axis_0, num_splits = attention_15_slice_value_cache_heads_num_splits_0, x = attention_15_slice_current_layer_value_cache)[name = string("attention_15_slice_value_cache_heads")]; bool attention_15_scores_0_transpose_y_0 = const()[name = string("attention_15_scores_0_transpose_y_0"), val = bool(true)]; bool attention_15_scores_0_transpose_x_0 = const()[name = string("attention_15_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_15_scores_0 = matmul(transpose_x = attention_15_scores_0_transpose_x_0, transpose_y = attention_15_scores_0_transpose_y_0, x = attention_15_key_cache_head_0, y = attention_15_q_splits_0)[name = string("attention_15_scores_0")]; fp16 attention_15_scaled_scores_0_y_0 = const()[name = string("attention_15_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_15_scaled_scores_0 = mul(x = attention_15_scores_0, y = attention_15_scaled_scores_0_y_0)[name = string("attention_15_scaled_scores_0")]; tensor attention_15_masked_scaled_scores_0 = add(x = attention_15_scaled_scores_0, y = transpose_0)[name = string("attention_15_masked_scaled_scores_0")]; int32 softmax_30_axis_0 = const()[name = string("softmax_30_axis_0"), val = int32(-2)]; tensor softmax_30 = softmax(axis = softmax_30_axis_0, x = attention_15_masked_scaled_scores_0)[name = string("softmax_30")]; bool attention_15_attention_0_transpose_x_0 = const()[name = string("attention_15_attention_0_transpose_x_0"), val = bool(true)]; bool attention_15_attention_0_transpose_y_0 = const()[name = string("attention_15_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_15_attention_0 = matmul(transpose_x = attention_15_attention_0_transpose_x_0, transpose_y = attention_15_attention_0_transpose_y_0, x = softmax_30, y = attention_15_slice_value_cache_heads_0)[name = string("attention_15_attention_0")]; bool attention_15_scores_1_transpose_y_0 = const()[name = string("attention_15_scores_1_transpose_y_0"), val = bool(true)]; bool attention_15_scores_1_transpose_x_0 = const()[name = string("attention_15_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_15_scores_1 = matmul(transpose_x = attention_15_scores_1_transpose_x_0, transpose_y = attention_15_scores_1_transpose_y_0, x = attention_15_key_cache_head_1, y = attention_15_q_splits_1)[name = string("attention_15_scores_1")]; fp16 attention_15_scaled_scores_1_y_0 = const()[name = string("attention_15_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_15_scaled_scores_1 = mul(x = attention_15_scores_1, y = attention_15_scaled_scores_1_y_0)[name = string("attention_15_scaled_scores_1")]; tensor attention_15_masked_scaled_scores_1 = add(x = attention_15_scaled_scores_1, y = transpose_0)[name = string("attention_15_masked_scaled_scores_1")]; int32 softmax_31_axis_0 = const()[name = string("softmax_31_axis_0"), val = int32(-2)]; tensor softmax_31 = softmax(axis = softmax_31_axis_0, x = attention_15_masked_scaled_scores_1)[name = string("softmax_31")]; bool attention_15_attention_1_transpose_x_0 = const()[name = string("attention_15_attention_1_transpose_x_0"), val = bool(true)]; bool attention_15_attention_1_transpose_y_0 = const()[name = string("attention_15_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_15_attention_1 = matmul(transpose_x = attention_15_attention_1_transpose_x_0, transpose_y = attention_15_attention_1_transpose_y_0, x = softmax_31, y = attention_15_slice_value_cache_heads_1)[name = string("attention_15_attention_1")]; int32 attention_15_concat_attention_all_heads_axis_0 = const()[name = string("attention_15_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_15_concat_attention_all_heads_interleave_0 = const()[name = string("attention_15_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_15_concat_attention_all_heads = concat(axis = attention_15_concat_attention_all_heads_axis_0, interleave = attention_15_concat_attention_all_heads_interleave_0, values = (attention_15_attention_0, attention_15_attention_1))[name = string("attention_15_concat_attention_all_heads")]; tensor attention_15_channels_first_retransposed_perm_0 = const()[name = string("attention_15_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_15_reshaped_shape_0 = const()[name = string("attention_15_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_15_channels_first_retransposed = transpose(perm = attention_15_channels_first_retransposed_perm_0, x = attention_15_concat_attention_all_heads)[name = string("transpose_17")]; tensor attention_15_reshaped = reshape(shape = attention_15_reshaped_shape_0, x = attention_15_channels_first_retransposed)[name = string("attention_15_reshaped")]; tensor attention_15_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466247552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466849728))))[name = string("attention_15_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_71 = constexpr_blockwise_shift_scale(data = attention_15_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466878464))))[name = string("constexpr_blockwise_shift_scale_71")]; tensor attention_15_outproj_strides_0 = const()[name = string("attention_15_outproj_strides_0"), val = tensor([1])]; string attention_15_outproj_pad_type_0 = const()[name = string("attention_15_outproj_pad_type_0"), val = string("valid")]; tensor attention_15_outproj_pad_0 = const()[name = string("attention_15_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_15_outproj_dilations_0 = const()[name = string("attention_15_outproj_dilations_0"), val = tensor([1])]; int32 attention_15_outproj_groups_0 = const()[name = string("attention_15_outproj_groups_0"), val = int32(1)]; tensor attention_15_outproj = conv(dilations = attention_15_outproj_dilations_0, groups = attention_15_outproj_groups_0, pad = attention_15_outproj_pad_0, pad_type = attention_15_outproj_pad_type_0, strides = attention_15_outproj_strides_0, weight = constexpr_blockwise_shift_scale_71, x = attention_15_reshaped)[name = string("attention_15_outproj")]; tensor block_15_residual_1 = add(x = block_14_residual_2, y = attention_15_outproj)[name = string("block_15_residual_1")]; tensor block_15_ffn_rmsnorm_abs = abs(x = block_15_residual_1)[name = string("block_15_ffn_rmsnorm_abs")]; tensor block_15_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_15_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_15_ffn_rmsnorm_maxval = reduce_max(axes = block_15_ffn_rmsnorm_maxval_axes_0, keep_dims = block_15_ffn_rmsnorm_maxval_keep_dims_0, x = block_15_ffn_rmsnorm_abs)[name = string("block_15_ffn_rmsnorm_maxval")]; fp16 block_15_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_15_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_15_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_15_ffn_rmsnorm_maxval_clipped = clip(alpha = block_15_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_15_ffn_rmsnorm_maxval_clipped_beta_0, x = block_15_ffn_rmsnorm_maxval)[name = string("block_15_ffn_rmsnorm_maxval_clipped")]; tensor block_15_ffn_rmsnorm_scaled = real_div(x = block_15_residual_1, y = block_15_ffn_rmsnorm_maxval_clipped)[name = string("block_15_ffn_rmsnorm_scaled")]; tensor block_15_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_15_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_15_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_15_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_15_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_15_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_15_ffn_rmsnorm_scaled)[name = string("block_15_ffn_rmsnorm_squared_sum")]; fp16 block_15_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_15_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_15_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_15_ffn_rmsnorm_rsqrt_epsilon_0, x = block_15_ffn_rmsnorm_squared_sum)[name = string("block_15_ffn_rmsnorm_rsqrt")]; fp16 block_15_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_15_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_15_ffn_rmsnorm_dim_scaled = mul(x = block_15_ffn_rmsnorm_scaled, y = block_15_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_15_ffn_rmsnorm_dim_scaled")]; tensor block_15_ffn_rmsnorm_normalized = mul(x = block_15_ffn_rmsnorm_dim_scaled, y = block_15_ffn_rmsnorm_rsqrt)[name = string("block_15_ffn_rmsnorm_normalized")]; tensor block_15_ffn_rmsnorm_y_0 = const()[name = string("block_15_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466880320)))]; tensor block_15_ffn_rmsnorm = mul(x = block_15_ffn_rmsnorm_normalized, y = block_15_ffn_rmsnorm_y_0)[name = string("block_15_ffn_rmsnorm")]; tensor block_15_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(466882176))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470150848))))[name = string("block_15_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_72 = constexpr_blockwise_shift_scale(data = block_15_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470306560))))[name = string("constexpr_blockwise_shift_scale_72")]; tensor block_15_ffn_inproj_strides_0 = const()[name = string("block_15_ffn_inproj_strides_0"), val = tensor([1])]; string block_15_ffn_inproj_pad_type_0 = const()[name = string("block_15_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_15_ffn_inproj_pad_0 = const()[name = string("block_15_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_15_ffn_inproj_dilations_0 = const()[name = string("block_15_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_15_ffn_inproj_groups_0 = const()[name = string("block_15_ffn_inproj_groups_0"), val = int32(1)]; tensor block_15_ffn_inproj = conv(dilations = block_15_ffn_inproj_dilations_0, groups = block_15_ffn_inproj_groups_0, pad = block_15_ffn_inproj_pad_0, pad_type = block_15_ffn_inproj_pad_type_0, strides = block_15_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_72, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_inproj")]; tensor block_15_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470316352))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473585024))))[name = string("block_15_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_73 = constexpr_blockwise_shift_scale(data = block_15_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473740736))))[name = string("constexpr_blockwise_shift_scale_73")]; tensor block_15_ffn_g_strides_0 = const()[name = string("block_15_ffn_g_strides_0"), val = tensor([1])]; string block_15_ffn_g_pad_type_0 = const()[name = string("block_15_ffn_g_pad_type_0"), val = string("valid")]; tensor block_15_ffn_g_pad_0 = const()[name = string("block_15_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_15_ffn_g_dilations_0 = const()[name = string("block_15_ffn_g_dilations_0"), val = tensor([1])]; int32 block_15_ffn_g_groups_0 = const()[name = string("block_15_ffn_g_groups_0"), val = int32(1)]; tensor block_15_ffn_g = conv(dilations = block_15_ffn_g_dilations_0, groups = block_15_ffn_g_groups_0, pad = block_15_ffn_g_pad_0, pad_type = block_15_ffn_g_pad_type_0, strides = block_15_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_73, x = block_15_ffn_rmsnorm)[name = string("block_15_ffn_g")]; tensor block_15_ffn_g_activation = silu(x = block_15_ffn_g)[name = string("block_15_ffn_g_activation")]; tensor block_15_ffn_x_gated = mul(x = block_15_ffn_inproj, y = block_15_ffn_g_activation)[name = string("block_15_ffn_x_gated")]; tensor block_15_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(473750528))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477019200))))[name = string("block_15_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_74 = constexpr_blockwise_shift_scale(data = block_15_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477047936))))[name = string("constexpr_blockwise_shift_scale_74")]; tensor block_15_ffn_outproj_strides_0 = const()[name = string("block_15_ffn_outproj_strides_0"), val = tensor([1])]; string block_15_ffn_outproj_pad_type_0 = const()[name = string("block_15_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_15_ffn_outproj_pad_0 = const()[name = string("block_15_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_15_ffn_outproj_dilations_0 = const()[name = string("block_15_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_15_ffn_outproj_groups_0 = const()[name = string("block_15_ffn_outproj_groups_0"), val = int32(1)]; tensor block_15_ffn_outproj = conv(dilations = block_15_ffn_outproj_dilations_0, groups = block_15_ffn_outproj_groups_0, pad = block_15_ffn_outproj_pad_0, pad_type = block_15_ffn_outproj_pad_type_0, strides = block_15_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_74, x = block_15_ffn_x_gated)[name = string("block_15_ffn_outproj")]; tensor block_15_residual_2 = add(x = block_15_ffn_outproj, y = block_15_residual_1)[name = string("block_15_residual_2")]; tensor block_16_attention_rmsnorm_abs = abs(x = block_15_residual_2)[name = string("block_16_attention_rmsnorm_abs")]; tensor block_16_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_16_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_16_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_16_attention_rmsnorm_maxval = reduce_max(axes = block_16_attention_rmsnorm_maxval_axes_0, keep_dims = block_16_attention_rmsnorm_maxval_keep_dims_0, x = block_16_attention_rmsnorm_abs)[name = string("block_16_attention_rmsnorm_maxval")]; fp16 block_16_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_16_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_16_attention_rmsnorm_maxval_clipped = clip(alpha = block_16_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_16_attention_rmsnorm_maxval_clipped_beta_0, x = block_16_attention_rmsnorm_maxval)[name = string("block_16_attention_rmsnorm_maxval_clipped")]; tensor block_16_attention_rmsnorm_scaled = real_div(x = block_15_residual_2, y = block_16_attention_rmsnorm_maxval_clipped)[name = string("block_16_attention_rmsnorm_scaled")]; tensor block_16_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_16_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_16_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_16_attention_rmsnorm_squared_sum_keep_dims_0, x = block_16_attention_rmsnorm_scaled)[name = string("block_16_attention_rmsnorm_squared_sum")]; fp16 block_16_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_16_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_16_attention_rmsnorm_rsqrt_epsilon_0, x = block_16_attention_rmsnorm_squared_sum)[name = string("block_16_attention_rmsnorm_rsqrt")]; fp16 block_16_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_16_attention_rmsnorm_dim_scaled = mul(x = block_16_attention_rmsnorm_scaled, y = block_16_attention_rmsnorm_dim_scaled_y_0)[name = string("block_16_attention_rmsnorm_dim_scaled")]; tensor block_16_attention_rmsnorm_normalized = mul(x = block_16_attention_rmsnorm_dim_scaled, y = block_16_attention_rmsnorm_rsqrt)[name = string("block_16_attention_rmsnorm_normalized")]; tensor block_16_attention_rmsnorm_y_0 = const()[name = string("block_16_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477049792)))]; tensor block_16_attention_rmsnorm = mul(x = block_16_attention_rmsnorm_normalized, y = block_16_attention_rmsnorm_y_0)[name = string("block_16_attention_rmsnorm")]; tensor attention_16_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477051648))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477825856))))[name = string("attention_16_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_75 = constexpr_blockwise_shift_scale(data = attention_16_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477862784))))[name = string("constexpr_blockwise_shift_scale_75")]; tensor attention_16_qkvproj_bias_0 = const()[name = string("attention_16_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477865152)))]; tensor attention_16_qkvproj_strides_0 = const()[name = string("attention_16_qkvproj_strides_0"), val = tensor([1])]; string attention_16_qkvproj_pad_type_0 = const()[name = string("attention_16_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_16_qkvproj_pad_0 = const()[name = string("attention_16_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_16_qkvproj_dilations_0 = const()[name = string("attention_16_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_16_qkvproj_groups_0 = const()[name = string("attention_16_qkvproj_groups_0"), val = int32(1)]; tensor attention_16_qkvproj = conv(bias = attention_16_qkvproj_bias_0, dilations = attention_16_qkvproj_dilations_0, groups = attention_16_qkvproj_groups_0, pad = attention_16_qkvproj_pad_0, pad_type = attention_16_qkvproj_pad_type_0, strides = attention_16_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_75, x = block_16_attention_rmsnorm)[name = string("attention_16_qkvproj")]; tensor attention_16_head_reshape_shape_0 = const()[name = string("attention_16_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_16_head_reshape = reshape(shape = attention_16_head_reshape_shape_0, x = attention_16_qkvproj)[name = string("attention_16_head_reshape")]; tensor attention_16_head_transpose_perm_0 = const()[name = string("attention_16_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_16_split_qkv_heads_axis_0 = const()[name = string("attention_16_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_16_split_qkv_heads_split_sizes_0 = const()[name = string("attention_16_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_16_head_transpose = transpose(perm = attention_16_head_transpose_perm_0, x = attention_16_head_reshape)[name = string("transpose_16")]; tensor attention_16_split_qkv_heads_0, tensor attention_16_split_qkv_heads_1, tensor attention_16_split_qkv_heads_2 = split(axis = attention_16_split_qkv_heads_axis_0, split_sizes = attention_16_split_qkv_heads_split_sizes_0, x = attention_16_head_transpose)[name = string("attention_16_split_qkv_heads")]; tensor attention_16_q_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_16_q_rope_lhs_mult")]; int32 attention_16_q_rotate_half_split_num_splits_0 = const()[name = string("attention_16_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_16_q_rotate_half_split_axis_0 = const()[name = string("attention_16_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_16_q_rotate_half_split_0, tensor attention_16_q_rotate_half_split_1 = split(axis = attention_16_q_rotate_half_split_axis_0, num_splits = attention_16_q_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_0)[name = string("attention_16_q_rotate_half_split")]; fp16 attention_16_q_rotate_half_neg_y_0 = const()[name = string("attention_16_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_16_q_rotate_half_neg = mul(x = attention_16_q_rotate_half_split_1, y = attention_16_q_rotate_half_neg_y_0)[name = string("attention_16_q_rotate_half_neg")]; int32 attention_16_q_rotate_half_concat_axis_0 = const()[name = string("attention_16_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_16_q_rotate_half_concat_interleave_0 = const()[name = string("attention_16_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_16_q_rotate_half_concat = concat(axis = attention_16_q_rotate_half_concat_axis_0, interleave = attention_16_q_rotate_half_concat_interleave_0, values = (attention_16_q_rotate_half_neg, attention_16_q_rotate_half_split_0))[name = string("attention_16_q_rotate_half_concat")]; tensor attention_16_q_rope_rhs_mult = mul(x = attention_16_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_q_rope_rhs_mult")]; tensor attention_16_q_rope = add(x = attention_16_q_rope_lhs_mult, y = attention_16_q_rope_rhs_mult)[name = string("attention_16_q_rope")]; tensor attention_16_k_rope_lhs_mult = mul(x = attention_16_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_16_k_rope_lhs_mult")]; int32 attention_16_k_rotate_half_split_num_splits_0 = const()[name = string("attention_16_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_16_k_rotate_half_split_axis_0 = const()[name = string("attention_16_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_16_k_rotate_half_split_0, tensor attention_16_k_rotate_half_split_1 = split(axis = attention_16_k_rotate_half_split_axis_0, num_splits = attention_16_k_rotate_half_split_num_splits_0, x = attention_16_split_qkv_heads_1)[name = string("attention_16_k_rotate_half_split")]; fp16 attention_16_k_rotate_half_neg_y_0 = const()[name = string("attention_16_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_16_k_rotate_half_neg = mul(x = attention_16_k_rotate_half_split_1, y = attention_16_k_rotate_half_neg_y_0)[name = string("attention_16_k_rotate_half_neg")]; int32 attention_16_k_rotate_half_concat_axis_0 = const()[name = string("attention_16_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_16_k_rotate_half_concat_interleave_0 = const()[name = string("attention_16_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_16_k_rotate_half_concat = concat(axis = attention_16_k_rotate_half_concat_axis_0, interleave = attention_16_k_rotate_half_concat_interleave_0, values = (attention_16_k_rotate_half_neg, attention_16_k_rotate_half_split_0))[name = string("attention_16_k_rotate_half_concat")]; tensor attention_16_k_rope_rhs_mult = mul(x = attention_16_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_16_k_rope_rhs_mult")]; tensor attention_16_k_rope = add(x = attention_16_k_rope_lhs_mult, y = attention_16_k_rope_rhs_mult)[name = string("attention_16_k_rope")]; int32 attention_16_q_splits_axis_0 = const()[name = string("attention_16_q_splits_axis_0"), val = int32(1)]; int32 attention_16_q_splits_num_splits_0 = const()[name = string("attention_16_q_splits_num_splits_0"), val = int32(2)]; tensor attention_16_q_splits_0, tensor attention_16_q_splits_1 = split(axis = attention_16_q_splits_axis_0, num_splits = attention_16_q_splits_num_splits_0, x = attention_16_q_rope)[name = string("attention_16_q_splits")]; tensor attention_16_update_begin_0_values0_0 = const()[name = string("attention_16_update_begin_0_values0_0"), val = tensor([16])]; tensor attention_16_update_begin_0_values1_0 = const()[name = string("attention_16_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_16_update_begin_0_values3_0 = const()[name = string("attention_16_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_16_update_begin_0_axis_0 = const()[name = string("attention_16_update_begin_0_axis_0"), val = int32(0)]; bool attention_16_update_begin_0_interleave_0 = const()[name = string("attention_16_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_16_update_begin_0 = concat(axis = attention_16_update_begin_0_axis_0, interleave = attention_16_update_begin_0_interleave_0, values = (attention_16_update_begin_0_values0_0, attention_16_update_begin_0_values1_0, query_pos1, attention_16_update_begin_0_values3_0))[name = string("attention_16_update_begin_0")]; tensor attention_16_update_end_0_values0_0 = const()[name = string("attention_16_update_end_0_values0_0"), val = tensor([17])]; tensor attention_16_update_end_0_values1_0 = const()[name = string("attention_16_update_end_0_values1_0"), val = tensor([2])]; tensor attention_16_update_end_0_values3_0 = const()[name = string("attention_16_update_end_0_values3_0"), val = tensor([64])]; int32 attention_16_update_end_0_axis_0 = const()[name = string("attention_16_update_end_0_axis_0"), val = int32(0)]; bool attention_16_update_end_0_interleave_0 = const()[name = string("attention_16_update_end_0_interleave_0"), val = bool(false)]; tensor attention_16_update_end_0 = concat(axis = attention_16_update_end_0_axis_0, interleave = attention_16_update_end_0_interleave_0, values = (attention_16_update_end_0_values0_0, attention_16_update_end_0_values1_0, end_pos_0, attention_16_update_end_0_values3_0))[name = string("attention_16_update_end_0")]; tensor attention_16_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_updated_key_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_key_cache_0_squeeze_mask_0, update = attention_16_k_rope, x = coreml_update_state_30)[name = string("attention_16_updated_key_cache_0")]; write_state(data = attention_16_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_32 = read_state(input = key_cache_state)[name = string("coreml_update_state_80")]; tensor attention_16_key_cache_begin_0 = const()[name = string("attention_16_key_cache_begin_0"), val = tensor([16, 0, 0, 0])]; tensor attention_16_key_cache_end_0 = const()[name = string("attention_16_key_cache_end_0"), val = tensor([17, 2, 512, 64])]; tensor attention_16_key_cache_squeeze_mask_0 = const()[name = string("attention_16_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_key_cache = slice_by_index(begin = attention_16_key_cache_begin_0, end = attention_16_key_cache_end_0, squeeze_mask = attention_16_key_cache_squeeze_mask_0, x = coreml_update_state_32)[name = string("attention_16_key_cache")]; int32 attention_16_key_cache_head_axis_0 = const()[name = string("attention_16_key_cache_head_axis_0"), val = int32(1)]; int32 attention_16_key_cache_head_num_splits_0 = const()[name = string("attention_16_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_16_key_cache_head_0, tensor attention_16_key_cache_head_1 = split(axis = attention_16_key_cache_head_axis_0, num_splits = attention_16_key_cache_head_num_splits_0, x = attention_16_key_cache)[name = string("attention_16_key_cache_head")]; tensor attention_16_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_16_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_updated_value_cache_0 = slice_update(begin = attention_16_update_begin_0, end = attention_16_update_end_0, squeeze_mask = attention_16_updated_value_cache_0_squeeze_mask_0, update = attention_16_split_qkv_heads_2, x = coreml_update_state_31)[name = string("attention_16_updated_value_cache_0")]; write_state(data = attention_16_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_33 = read_state(input = value_cache_state)[name = string("coreml_update_state_81")]; tensor attention_16_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_16_slice_current_layer_value_cache_begin_0"), val = tensor([16, 0, 0, 0])]; tensor attention_16_slice_current_layer_value_cache_end_0 = const()[name = string("attention_16_slice_current_layer_value_cache_end_0"), val = tensor([17, 2, 512, 64])]; tensor attention_16_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_16_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_16_slice_current_layer_value_cache = slice_by_index(begin = attention_16_slice_current_layer_value_cache_begin_0, end = attention_16_slice_current_layer_value_cache_end_0, squeeze_mask = attention_16_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_33)[name = string("attention_16_slice_current_layer_value_cache")]; int32 attention_16_slice_value_cache_heads_axis_0 = const()[name = string("attention_16_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_16_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_16_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_16_slice_value_cache_heads_0, tensor attention_16_slice_value_cache_heads_1 = split(axis = attention_16_slice_value_cache_heads_axis_0, num_splits = attention_16_slice_value_cache_heads_num_splits_0, x = attention_16_slice_current_layer_value_cache)[name = string("attention_16_slice_value_cache_heads")]; bool attention_16_scores_0_transpose_y_0 = const()[name = string("attention_16_scores_0_transpose_y_0"), val = bool(true)]; bool attention_16_scores_0_transpose_x_0 = const()[name = string("attention_16_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_16_scores_0 = matmul(transpose_x = attention_16_scores_0_transpose_x_0, transpose_y = attention_16_scores_0_transpose_y_0, x = attention_16_key_cache_head_0, y = attention_16_q_splits_0)[name = string("attention_16_scores_0")]; fp16 attention_16_scaled_scores_0_y_0 = const()[name = string("attention_16_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_16_scaled_scores_0 = mul(x = attention_16_scores_0, y = attention_16_scaled_scores_0_y_0)[name = string("attention_16_scaled_scores_0")]; tensor attention_16_masked_scaled_scores_0 = add(x = attention_16_scaled_scores_0, y = transpose_0)[name = string("attention_16_masked_scaled_scores_0")]; int32 softmax_32_axis_0 = const()[name = string("softmax_32_axis_0"), val = int32(-2)]; tensor softmax_32 = softmax(axis = softmax_32_axis_0, x = attention_16_masked_scaled_scores_0)[name = string("softmax_32")]; bool attention_16_attention_0_transpose_x_0 = const()[name = string("attention_16_attention_0_transpose_x_0"), val = bool(true)]; bool attention_16_attention_0_transpose_y_0 = const()[name = string("attention_16_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_16_attention_0 = matmul(transpose_x = attention_16_attention_0_transpose_x_0, transpose_y = attention_16_attention_0_transpose_y_0, x = softmax_32, y = attention_16_slice_value_cache_heads_0)[name = string("attention_16_attention_0")]; bool attention_16_scores_1_transpose_y_0 = const()[name = string("attention_16_scores_1_transpose_y_0"), val = bool(true)]; bool attention_16_scores_1_transpose_x_0 = const()[name = string("attention_16_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_16_scores_1 = matmul(transpose_x = attention_16_scores_1_transpose_x_0, transpose_y = attention_16_scores_1_transpose_y_0, x = attention_16_key_cache_head_1, y = attention_16_q_splits_1)[name = string("attention_16_scores_1")]; fp16 attention_16_scaled_scores_1_y_0 = const()[name = string("attention_16_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_16_scaled_scores_1 = mul(x = attention_16_scores_1, y = attention_16_scaled_scores_1_y_0)[name = string("attention_16_scaled_scores_1")]; tensor attention_16_masked_scaled_scores_1 = add(x = attention_16_scaled_scores_1, y = transpose_0)[name = string("attention_16_masked_scaled_scores_1")]; int32 softmax_33_axis_0 = const()[name = string("softmax_33_axis_0"), val = int32(-2)]; tensor softmax_33 = softmax(axis = softmax_33_axis_0, x = attention_16_masked_scaled_scores_1)[name = string("softmax_33")]; bool attention_16_attention_1_transpose_x_0 = const()[name = string("attention_16_attention_1_transpose_x_0"), val = bool(true)]; bool attention_16_attention_1_transpose_y_0 = const()[name = string("attention_16_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_16_attention_1 = matmul(transpose_x = attention_16_attention_1_transpose_x_0, transpose_y = attention_16_attention_1_transpose_y_0, x = softmax_33, y = attention_16_slice_value_cache_heads_1)[name = string("attention_16_attention_1")]; int32 attention_16_concat_attention_all_heads_axis_0 = const()[name = string("attention_16_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_16_concat_attention_all_heads_interleave_0 = const()[name = string("attention_16_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_16_concat_attention_all_heads = concat(axis = attention_16_concat_attention_all_heads_axis_0, interleave = attention_16_concat_attention_all_heads_interleave_0, values = (attention_16_attention_0, attention_16_attention_1))[name = string("attention_16_concat_attention_all_heads")]; tensor attention_16_channels_first_retransposed_perm_0 = const()[name = string("attention_16_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_16_reshaped_shape_0 = const()[name = string("attention_16_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_16_channels_first_retransposed = transpose(perm = attention_16_channels_first_retransposed_perm_0, x = attention_16_concat_attention_all_heads)[name = string("transpose_15")]; tensor attention_16_reshaped = reshape(shape = attention_16_reshaped_shape_0, x = attention_16_channels_first_retransposed)[name = string("attention_16_reshaped")]; tensor attention_16_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(477867520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478469696))))[name = string("attention_16_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_76 = constexpr_blockwise_shift_scale(data = attention_16_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478498432))))[name = string("constexpr_blockwise_shift_scale_76")]; tensor attention_16_outproj_strides_0 = const()[name = string("attention_16_outproj_strides_0"), val = tensor([1])]; string attention_16_outproj_pad_type_0 = const()[name = string("attention_16_outproj_pad_type_0"), val = string("valid")]; tensor attention_16_outproj_pad_0 = const()[name = string("attention_16_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_16_outproj_dilations_0 = const()[name = string("attention_16_outproj_dilations_0"), val = tensor([1])]; int32 attention_16_outproj_groups_0 = const()[name = string("attention_16_outproj_groups_0"), val = int32(1)]; tensor attention_16_outproj = conv(dilations = attention_16_outproj_dilations_0, groups = attention_16_outproj_groups_0, pad = attention_16_outproj_pad_0, pad_type = attention_16_outproj_pad_type_0, strides = attention_16_outproj_strides_0, weight = constexpr_blockwise_shift_scale_76, x = attention_16_reshaped)[name = string("attention_16_outproj")]; tensor block_16_residual_1 = add(x = block_15_residual_2, y = attention_16_outproj)[name = string("block_16_residual_1")]; tensor block_16_ffn_rmsnorm_abs = abs(x = block_16_residual_1)[name = string("block_16_ffn_rmsnorm_abs")]; tensor block_16_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_16_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_16_ffn_rmsnorm_maxval = reduce_max(axes = block_16_ffn_rmsnorm_maxval_axes_0, keep_dims = block_16_ffn_rmsnorm_maxval_keep_dims_0, x = block_16_ffn_rmsnorm_abs)[name = string("block_16_ffn_rmsnorm_maxval")]; fp16 block_16_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_16_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_16_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_16_ffn_rmsnorm_maxval_clipped = clip(alpha = block_16_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_16_ffn_rmsnorm_maxval_clipped_beta_0, x = block_16_ffn_rmsnorm_maxval)[name = string("block_16_ffn_rmsnorm_maxval_clipped")]; tensor block_16_ffn_rmsnorm_scaled = real_div(x = block_16_residual_1, y = block_16_ffn_rmsnorm_maxval_clipped)[name = string("block_16_ffn_rmsnorm_scaled")]; tensor block_16_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_16_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_16_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_16_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_16_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_16_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_16_ffn_rmsnorm_scaled)[name = string("block_16_ffn_rmsnorm_squared_sum")]; fp16 block_16_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_16_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_16_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_16_ffn_rmsnorm_rsqrt_epsilon_0, x = block_16_ffn_rmsnorm_squared_sum)[name = string("block_16_ffn_rmsnorm_rsqrt")]; fp16 block_16_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_16_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_16_ffn_rmsnorm_dim_scaled = mul(x = block_16_ffn_rmsnorm_scaled, y = block_16_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_16_ffn_rmsnorm_dim_scaled")]; tensor block_16_ffn_rmsnorm_normalized = mul(x = block_16_ffn_rmsnorm_dim_scaled, y = block_16_ffn_rmsnorm_rsqrt)[name = string("block_16_ffn_rmsnorm_normalized")]; tensor block_16_ffn_rmsnorm_y_0 = const()[name = string("block_16_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478500288)))]; tensor block_16_ffn_rmsnorm = mul(x = block_16_ffn_rmsnorm_normalized, y = block_16_ffn_rmsnorm_y_0)[name = string("block_16_ffn_rmsnorm")]; tensor block_16_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(478502144))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481770816))))[name = string("block_16_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_77 = constexpr_blockwise_shift_scale(data = block_16_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481926528))))[name = string("constexpr_blockwise_shift_scale_77")]; tensor block_16_ffn_inproj_strides_0 = const()[name = string("block_16_ffn_inproj_strides_0"), val = tensor([1])]; string block_16_ffn_inproj_pad_type_0 = const()[name = string("block_16_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_16_ffn_inproj_pad_0 = const()[name = string("block_16_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_16_ffn_inproj_dilations_0 = const()[name = string("block_16_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_16_ffn_inproj_groups_0 = const()[name = string("block_16_ffn_inproj_groups_0"), val = int32(1)]; tensor block_16_ffn_inproj = conv(dilations = block_16_ffn_inproj_dilations_0, groups = block_16_ffn_inproj_groups_0, pad = block_16_ffn_inproj_pad_0, pad_type = block_16_ffn_inproj_pad_type_0, strides = block_16_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_77, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_inproj")]; tensor block_16_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481936320))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485204992))))[name = string("block_16_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_78 = constexpr_blockwise_shift_scale(data = block_16_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485360704))))[name = string("constexpr_blockwise_shift_scale_78")]; tensor block_16_ffn_g_strides_0 = const()[name = string("block_16_ffn_g_strides_0"), val = tensor([1])]; string block_16_ffn_g_pad_type_0 = const()[name = string("block_16_ffn_g_pad_type_0"), val = string("valid")]; tensor block_16_ffn_g_pad_0 = const()[name = string("block_16_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_16_ffn_g_dilations_0 = const()[name = string("block_16_ffn_g_dilations_0"), val = tensor([1])]; int32 block_16_ffn_g_groups_0 = const()[name = string("block_16_ffn_g_groups_0"), val = int32(1)]; tensor block_16_ffn_g = conv(dilations = block_16_ffn_g_dilations_0, groups = block_16_ffn_g_groups_0, pad = block_16_ffn_g_pad_0, pad_type = block_16_ffn_g_pad_type_0, strides = block_16_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_78, x = block_16_ffn_rmsnorm)[name = string("block_16_ffn_g")]; tensor block_16_ffn_g_activation = silu(x = block_16_ffn_g)[name = string("block_16_ffn_g_activation")]; tensor block_16_ffn_x_gated = mul(x = block_16_ffn_inproj, y = block_16_ffn_g_activation)[name = string("block_16_ffn_x_gated")]; tensor block_16_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(485370496))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488639168))))[name = string("block_16_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_79 = constexpr_blockwise_shift_scale(data = block_16_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488667904))))[name = string("constexpr_blockwise_shift_scale_79")]; tensor block_16_ffn_outproj_strides_0 = const()[name = string("block_16_ffn_outproj_strides_0"), val = tensor([1])]; string block_16_ffn_outproj_pad_type_0 = const()[name = string("block_16_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_16_ffn_outproj_pad_0 = const()[name = string("block_16_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_16_ffn_outproj_dilations_0 = const()[name = string("block_16_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_16_ffn_outproj_groups_0 = const()[name = string("block_16_ffn_outproj_groups_0"), val = int32(1)]; tensor block_16_ffn_outproj = conv(dilations = block_16_ffn_outproj_dilations_0, groups = block_16_ffn_outproj_groups_0, pad = block_16_ffn_outproj_pad_0, pad_type = block_16_ffn_outproj_pad_type_0, strides = block_16_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_79, x = block_16_ffn_x_gated)[name = string("block_16_ffn_outproj")]; tensor block_16_residual_2 = add(x = block_16_ffn_outproj, y = block_16_residual_1)[name = string("block_16_residual_2")]; tensor block_17_attention_rmsnorm_abs = abs(x = block_16_residual_2)[name = string("block_17_attention_rmsnorm_abs")]; tensor block_17_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_17_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_17_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_17_attention_rmsnorm_maxval = reduce_max(axes = block_17_attention_rmsnorm_maxval_axes_0, keep_dims = block_17_attention_rmsnorm_maxval_keep_dims_0, x = block_17_attention_rmsnorm_abs)[name = string("block_17_attention_rmsnorm_maxval")]; fp16 block_17_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_17_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_17_attention_rmsnorm_maxval_clipped = clip(alpha = block_17_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_17_attention_rmsnorm_maxval_clipped_beta_0, x = block_17_attention_rmsnorm_maxval)[name = string("block_17_attention_rmsnorm_maxval_clipped")]; tensor block_17_attention_rmsnorm_scaled = real_div(x = block_16_residual_2, y = block_17_attention_rmsnorm_maxval_clipped)[name = string("block_17_attention_rmsnorm_scaled")]; tensor block_17_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_17_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_17_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_17_attention_rmsnorm_squared_sum_keep_dims_0, x = block_17_attention_rmsnorm_scaled)[name = string("block_17_attention_rmsnorm_squared_sum")]; fp16 block_17_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_17_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_17_attention_rmsnorm_rsqrt_epsilon_0, x = block_17_attention_rmsnorm_squared_sum)[name = string("block_17_attention_rmsnorm_rsqrt")]; fp16 block_17_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_17_attention_rmsnorm_dim_scaled = mul(x = block_17_attention_rmsnorm_scaled, y = block_17_attention_rmsnorm_dim_scaled_y_0)[name = string("block_17_attention_rmsnorm_dim_scaled")]; tensor block_17_attention_rmsnorm_normalized = mul(x = block_17_attention_rmsnorm_dim_scaled, y = block_17_attention_rmsnorm_rsqrt)[name = string("block_17_attention_rmsnorm_normalized")]; tensor block_17_attention_rmsnorm_y_0 = const()[name = string("block_17_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488669760)))]; tensor block_17_attention_rmsnorm = mul(x = block_17_attention_rmsnorm_normalized, y = block_17_attention_rmsnorm_y_0)[name = string("block_17_attention_rmsnorm")]; tensor attention_17_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488671616))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489445824))))[name = string("attention_17_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_80 = constexpr_blockwise_shift_scale(data = attention_17_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489482752))))[name = string("constexpr_blockwise_shift_scale_80")]; tensor attention_17_qkvproj_bias_0 = const()[name = string("attention_17_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489485120)))]; tensor attention_17_qkvproj_strides_0 = const()[name = string("attention_17_qkvproj_strides_0"), val = tensor([1])]; string attention_17_qkvproj_pad_type_0 = const()[name = string("attention_17_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_17_qkvproj_pad_0 = const()[name = string("attention_17_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_17_qkvproj_dilations_0 = const()[name = string("attention_17_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_17_qkvproj_groups_0 = const()[name = string("attention_17_qkvproj_groups_0"), val = int32(1)]; tensor attention_17_qkvproj = conv(bias = attention_17_qkvproj_bias_0, dilations = attention_17_qkvproj_dilations_0, groups = attention_17_qkvproj_groups_0, pad = attention_17_qkvproj_pad_0, pad_type = attention_17_qkvproj_pad_type_0, strides = attention_17_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_80, x = block_17_attention_rmsnorm)[name = string("attention_17_qkvproj")]; tensor attention_17_head_reshape_shape_0 = const()[name = string("attention_17_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_17_head_reshape = reshape(shape = attention_17_head_reshape_shape_0, x = attention_17_qkvproj)[name = string("attention_17_head_reshape")]; tensor attention_17_head_transpose_perm_0 = const()[name = string("attention_17_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_17_split_qkv_heads_axis_0 = const()[name = string("attention_17_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_17_split_qkv_heads_split_sizes_0 = const()[name = string("attention_17_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_17_head_transpose = transpose(perm = attention_17_head_transpose_perm_0, x = attention_17_head_reshape)[name = string("transpose_14")]; tensor attention_17_split_qkv_heads_0, tensor attention_17_split_qkv_heads_1, tensor attention_17_split_qkv_heads_2 = split(axis = attention_17_split_qkv_heads_axis_0, split_sizes = attention_17_split_qkv_heads_split_sizes_0, x = attention_17_head_transpose)[name = string("attention_17_split_qkv_heads")]; tensor attention_17_q_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_17_q_rope_lhs_mult")]; int32 attention_17_q_rotate_half_split_num_splits_0 = const()[name = string("attention_17_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_17_q_rotate_half_split_axis_0 = const()[name = string("attention_17_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_17_q_rotate_half_split_0, tensor attention_17_q_rotate_half_split_1 = split(axis = attention_17_q_rotate_half_split_axis_0, num_splits = attention_17_q_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_0)[name = string("attention_17_q_rotate_half_split")]; fp16 attention_17_q_rotate_half_neg_y_0 = const()[name = string("attention_17_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_17_q_rotate_half_neg = mul(x = attention_17_q_rotate_half_split_1, y = attention_17_q_rotate_half_neg_y_0)[name = string("attention_17_q_rotate_half_neg")]; int32 attention_17_q_rotate_half_concat_axis_0 = const()[name = string("attention_17_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_17_q_rotate_half_concat_interleave_0 = const()[name = string("attention_17_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_17_q_rotate_half_concat = concat(axis = attention_17_q_rotate_half_concat_axis_0, interleave = attention_17_q_rotate_half_concat_interleave_0, values = (attention_17_q_rotate_half_neg, attention_17_q_rotate_half_split_0))[name = string("attention_17_q_rotate_half_concat")]; tensor attention_17_q_rope_rhs_mult = mul(x = attention_17_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_q_rope_rhs_mult")]; tensor attention_17_q_rope = add(x = attention_17_q_rope_lhs_mult, y = attention_17_q_rope_rhs_mult)[name = string("attention_17_q_rope")]; tensor attention_17_k_rope_lhs_mult = mul(x = attention_17_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_17_k_rope_lhs_mult")]; int32 attention_17_k_rotate_half_split_num_splits_0 = const()[name = string("attention_17_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_17_k_rotate_half_split_axis_0 = const()[name = string("attention_17_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_17_k_rotate_half_split_0, tensor attention_17_k_rotate_half_split_1 = split(axis = attention_17_k_rotate_half_split_axis_0, num_splits = attention_17_k_rotate_half_split_num_splits_0, x = attention_17_split_qkv_heads_1)[name = string("attention_17_k_rotate_half_split")]; fp16 attention_17_k_rotate_half_neg_y_0 = const()[name = string("attention_17_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_17_k_rotate_half_neg = mul(x = attention_17_k_rotate_half_split_1, y = attention_17_k_rotate_half_neg_y_0)[name = string("attention_17_k_rotate_half_neg")]; int32 attention_17_k_rotate_half_concat_axis_0 = const()[name = string("attention_17_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_17_k_rotate_half_concat_interleave_0 = const()[name = string("attention_17_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_17_k_rotate_half_concat = concat(axis = attention_17_k_rotate_half_concat_axis_0, interleave = attention_17_k_rotate_half_concat_interleave_0, values = (attention_17_k_rotate_half_neg, attention_17_k_rotate_half_split_0))[name = string("attention_17_k_rotate_half_concat")]; tensor attention_17_k_rope_rhs_mult = mul(x = attention_17_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_17_k_rope_rhs_mult")]; tensor attention_17_k_rope = add(x = attention_17_k_rope_lhs_mult, y = attention_17_k_rope_rhs_mult)[name = string("attention_17_k_rope")]; int32 attention_17_q_splits_axis_0 = const()[name = string("attention_17_q_splits_axis_0"), val = int32(1)]; int32 attention_17_q_splits_num_splits_0 = const()[name = string("attention_17_q_splits_num_splits_0"), val = int32(2)]; tensor attention_17_q_splits_0, tensor attention_17_q_splits_1 = split(axis = attention_17_q_splits_axis_0, num_splits = attention_17_q_splits_num_splits_0, x = attention_17_q_rope)[name = string("attention_17_q_splits")]; tensor attention_17_update_begin_0_values0_0 = const()[name = string("attention_17_update_begin_0_values0_0"), val = tensor([17])]; tensor attention_17_update_begin_0_values1_0 = const()[name = string("attention_17_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_17_update_begin_0_values3_0 = const()[name = string("attention_17_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_17_update_begin_0_axis_0 = const()[name = string("attention_17_update_begin_0_axis_0"), val = int32(0)]; bool attention_17_update_begin_0_interleave_0 = const()[name = string("attention_17_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_17_update_begin_0 = concat(axis = attention_17_update_begin_0_axis_0, interleave = attention_17_update_begin_0_interleave_0, values = (attention_17_update_begin_0_values0_0, attention_17_update_begin_0_values1_0, query_pos1, attention_17_update_begin_0_values3_0))[name = string("attention_17_update_begin_0")]; tensor attention_17_update_end_0_values0_0 = const()[name = string("attention_17_update_end_0_values0_0"), val = tensor([18])]; tensor attention_17_update_end_0_values1_0 = const()[name = string("attention_17_update_end_0_values1_0"), val = tensor([2])]; tensor attention_17_update_end_0_values3_0 = const()[name = string("attention_17_update_end_0_values3_0"), val = tensor([64])]; int32 attention_17_update_end_0_axis_0 = const()[name = string("attention_17_update_end_0_axis_0"), val = int32(0)]; bool attention_17_update_end_0_interleave_0 = const()[name = string("attention_17_update_end_0_interleave_0"), val = bool(false)]; tensor attention_17_update_end_0 = concat(axis = attention_17_update_end_0_axis_0, interleave = attention_17_update_end_0_interleave_0, values = (attention_17_update_end_0_values0_0, attention_17_update_end_0_values1_0, end_pos_0, attention_17_update_end_0_values3_0))[name = string("attention_17_update_end_0")]; tensor attention_17_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_updated_key_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_key_cache_0_squeeze_mask_0, update = attention_17_k_rope, x = coreml_update_state_32)[name = string("attention_17_updated_key_cache_0")]; write_state(data = attention_17_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_34 = read_state(input = key_cache_state)[name = string("coreml_update_state_82")]; tensor attention_17_key_cache_begin_0 = const()[name = string("attention_17_key_cache_begin_0"), val = tensor([17, 0, 0, 0])]; tensor attention_17_key_cache_end_0 = const()[name = string("attention_17_key_cache_end_0"), val = tensor([18, 2, 512, 64])]; tensor attention_17_key_cache_squeeze_mask_0 = const()[name = string("attention_17_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_key_cache = slice_by_index(begin = attention_17_key_cache_begin_0, end = attention_17_key_cache_end_0, squeeze_mask = attention_17_key_cache_squeeze_mask_0, x = coreml_update_state_34)[name = string("attention_17_key_cache")]; int32 attention_17_key_cache_head_axis_0 = const()[name = string("attention_17_key_cache_head_axis_0"), val = int32(1)]; int32 attention_17_key_cache_head_num_splits_0 = const()[name = string("attention_17_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_17_key_cache_head_0, tensor attention_17_key_cache_head_1 = split(axis = attention_17_key_cache_head_axis_0, num_splits = attention_17_key_cache_head_num_splits_0, x = attention_17_key_cache)[name = string("attention_17_key_cache_head")]; tensor attention_17_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_17_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_updated_value_cache_0 = slice_update(begin = attention_17_update_begin_0, end = attention_17_update_end_0, squeeze_mask = attention_17_updated_value_cache_0_squeeze_mask_0, update = attention_17_split_qkv_heads_2, x = coreml_update_state_33)[name = string("attention_17_updated_value_cache_0")]; write_state(data = attention_17_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_35 = read_state(input = value_cache_state)[name = string("coreml_update_state_83")]; tensor attention_17_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_17_slice_current_layer_value_cache_begin_0"), val = tensor([17, 0, 0, 0])]; tensor attention_17_slice_current_layer_value_cache_end_0 = const()[name = string("attention_17_slice_current_layer_value_cache_end_0"), val = tensor([18, 2, 512, 64])]; tensor attention_17_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_17_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_17_slice_current_layer_value_cache = slice_by_index(begin = attention_17_slice_current_layer_value_cache_begin_0, end = attention_17_slice_current_layer_value_cache_end_0, squeeze_mask = attention_17_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_35)[name = string("attention_17_slice_current_layer_value_cache")]; int32 attention_17_slice_value_cache_heads_axis_0 = const()[name = string("attention_17_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_17_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_17_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_17_slice_value_cache_heads_0, tensor attention_17_slice_value_cache_heads_1 = split(axis = attention_17_slice_value_cache_heads_axis_0, num_splits = attention_17_slice_value_cache_heads_num_splits_0, x = attention_17_slice_current_layer_value_cache)[name = string("attention_17_slice_value_cache_heads")]; bool attention_17_scores_0_transpose_y_0 = const()[name = string("attention_17_scores_0_transpose_y_0"), val = bool(true)]; bool attention_17_scores_0_transpose_x_0 = const()[name = string("attention_17_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_17_scores_0 = matmul(transpose_x = attention_17_scores_0_transpose_x_0, transpose_y = attention_17_scores_0_transpose_y_0, x = attention_17_key_cache_head_0, y = attention_17_q_splits_0)[name = string("attention_17_scores_0")]; fp16 attention_17_scaled_scores_0_y_0 = const()[name = string("attention_17_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_17_scaled_scores_0 = mul(x = attention_17_scores_0, y = attention_17_scaled_scores_0_y_0)[name = string("attention_17_scaled_scores_0")]; tensor attention_17_masked_scaled_scores_0 = add(x = attention_17_scaled_scores_0, y = transpose_0)[name = string("attention_17_masked_scaled_scores_0")]; int32 softmax_34_axis_0 = const()[name = string("softmax_34_axis_0"), val = int32(-2)]; tensor softmax_34 = softmax(axis = softmax_34_axis_0, x = attention_17_masked_scaled_scores_0)[name = string("softmax_34")]; bool attention_17_attention_0_transpose_x_0 = const()[name = string("attention_17_attention_0_transpose_x_0"), val = bool(true)]; bool attention_17_attention_0_transpose_y_0 = const()[name = string("attention_17_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_17_attention_0 = matmul(transpose_x = attention_17_attention_0_transpose_x_0, transpose_y = attention_17_attention_0_transpose_y_0, x = softmax_34, y = attention_17_slice_value_cache_heads_0)[name = string("attention_17_attention_0")]; bool attention_17_scores_1_transpose_y_0 = const()[name = string("attention_17_scores_1_transpose_y_0"), val = bool(true)]; bool attention_17_scores_1_transpose_x_0 = const()[name = string("attention_17_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_17_scores_1 = matmul(transpose_x = attention_17_scores_1_transpose_x_0, transpose_y = attention_17_scores_1_transpose_y_0, x = attention_17_key_cache_head_1, y = attention_17_q_splits_1)[name = string("attention_17_scores_1")]; fp16 attention_17_scaled_scores_1_y_0 = const()[name = string("attention_17_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_17_scaled_scores_1 = mul(x = attention_17_scores_1, y = attention_17_scaled_scores_1_y_0)[name = string("attention_17_scaled_scores_1")]; tensor attention_17_masked_scaled_scores_1 = add(x = attention_17_scaled_scores_1, y = transpose_0)[name = string("attention_17_masked_scaled_scores_1")]; int32 softmax_35_axis_0 = const()[name = string("softmax_35_axis_0"), val = int32(-2)]; tensor softmax_35 = softmax(axis = softmax_35_axis_0, x = attention_17_masked_scaled_scores_1)[name = string("softmax_35")]; bool attention_17_attention_1_transpose_x_0 = const()[name = string("attention_17_attention_1_transpose_x_0"), val = bool(true)]; bool attention_17_attention_1_transpose_y_0 = const()[name = string("attention_17_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_17_attention_1 = matmul(transpose_x = attention_17_attention_1_transpose_x_0, transpose_y = attention_17_attention_1_transpose_y_0, x = softmax_35, y = attention_17_slice_value_cache_heads_1)[name = string("attention_17_attention_1")]; int32 attention_17_concat_attention_all_heads_axis_0 = const()[name = string("attention_17_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_17_concat_attention_all_heads_interleave_0 = const()[name = string("attention_17_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_17_concat_attention_all_heads = concat(axis = attention_17_concat_attention_all_heads_axis_0, interleave = attention_17_concat_attention_all_heads_interleave_0, values = (attention_17_attention_0, attention_17_attention_1))[name = string("attention_17_concat_attention_all_heads")]; tensor attention_17_channels_first_retransposed_perm_0 = const()[name = string("attention_17_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_17_reshaped_shape_0 = const()[name = string("attention_17_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_17_channels_first_retransposed = transpose(perm = attention_17_channels_first_retransposed_perm_0, x = attention_17_concat_attention_all_heads)[name = string("transpose_13")]; tensor attention_17_reshaped = reshape(shape = attention_17_reshaped_shape_0, x = attention_17_channels_first_retransposed)[name = string("attention_17_reshaped")]; tensor attention_17_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489487488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490089664))))[name = string("attention_17_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_81 = constexpr_blockwise_shift_scale(data = attention_17_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490118400))))[name = string("constexpr_blockwise_shift_scale_81")]; tensor attention_17_outproj_strides_0 = const()[name = string("attention_17_outproj_strides_0"), val = tensor([1])]; string attention_17_outproj_pad_type_0 = const()[name = string("attention_17_outproj_pad_type_0"), val = string("valid")]; tensor attention_17_outproj_pad_0 = const()[name = string("attention_17_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_17_outproj_dilations_0 = const()[name = string("attention_17_outproj_dilations_0"), val = tensor([1])]; int32 attention_17_outproj_groups_0 = const()[name = string("attention_17_outproj_groups_0"), val = int32(1)]; tensor attention_17_outproj = conv(dilations = attention_17_outproj_dilations_0, groups = attention_17_outproj_groups_0, pad = attention_17_outproj_pad_0, pad_type = attention_17_outproj_pad_type_0, strides = attention_17_outproj_strides_0, weight = constexpr_blockwise_shift_scale_81, x = attention_17_reshaped)[name = string("attention_17_outproj")]; tensor block_17_residual_1 = add(x = block_16_residual_2, y = attention_17_outproj)[name = string("block_17_residual_1")]; tensor block_17_ffn_rmsnorm_abs = abs(x = block_17_residual_1)[name = string("block_17_ffn_rmsnorm_abs")]; tensor block_17_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_17_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_17_ffn_rmsnorm_maxval = reduce_max(axes = block_17_ffn_rmsnorm_maxval_axes_0, keep_dims = block_17_ffn_rmsnorm_maxval_keep_dims_0, x = block_17_ffn_rmsnorm_abs)[name = string("block_17_ffn_rmsnorm_maxval")]; fp16 block_17_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_17_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_17_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_17_ffn_rmsnorm_maxval_clipped = clip(alpha = block_17_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_17_ffn_rmsnorm_maxval_clipped_beta_0, x = block_17_ffn_rmsnorm_maxval)[name = string("block_17_ffn_rmsnorm_maxval_clipped")]; tensor block_17_ffn_rmsnorm_scaled = real_div(x = block_17_residual_1, y = block_17_ffn_rmsnorm_maxval_clipped)[name = string("block_17_ffn_rmsnorm_scaled")]; tensor block_17_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_17_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_17_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_17_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_17_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_17_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_17_ffn_rmsnorm_scaled)[name = string("block_17_ffn_rmsnorm_squared_sum")]; fp16 block_17_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_17_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_17_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_17_ffn_rmsnorm_rsqrt_epsilon_0, x = block_17_ffn_rmsnorm_squared_sum)[name = string("block_17_ffn_rmsnorm_rsqrt")]; fp16 block_17_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_17_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_17_ffn_rmsnorm_dim_scaled = mul(x = block_17_ffn_rmsnorm_scaled, y = block_17_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_17_ffn_rmsnorm_dim_scaled")]; tensor block_17_ffn_rmsnorm_normalized = mul(x = block_17_ffn_rmsnorm_dim_scaled, y = block_17_ffn_rmsnorm_rsqrt)[name = string("block_17_ffn_rmsnorm_normalized")]; tensor block_17_ffn_rmsnorm_y_0 = const()[name = string("block_17_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490120256)))]; tensor block_17_ffn_rmsnorm = mul(x = block_17_ffn_rmsnorm_normalized, y = block_17_ffn_rmsnorm_y_0)[name = string("block_17_ffn_rmsnorm")]; tensor block_17_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(490122112))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493390784))))[name = string("block_17_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_82 = constexpr_blockwise_shift_scale(data = block_17_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493546496))))[name = string("constexpr_blockwise_shift_scale_82")]; tensor block_17_ffn_inproj_strides_0 = const()[name = string("block_17_ffn_inproj_strides_0"), val = tensor([1])]; string block_17_ffn_inproj_pad_type_0 = const()[name = string("block_17_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_17_ffn_inproj_pad_0 = const()[name = string("block_17_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_17_ffn_inproj_dilations_0 = const()[name = string("block_17_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_17_ffn_inproj_groups_0 = const()[name = string("block_17_ffn_inproj_groups_0"), val = int32(1)]; tensor block_17_ffn_inproj = conv(dilations = block_17_ffn_inproj_dilations_0, groups = block_17_ffn_inproj_groups_0, pad = block_17_ffn_inproj_pad_0, pad_type = block_17_ffn_inproj_pad_type_0, strides = block_17_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_82, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_inproj")]; tensor block_17_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493556288))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496824960))))[name = string("block_17_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_83 = constexpr_blockwise_shift_scale(data = block_17_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496980672))))[name = string("constexpr_blockwise_shift_scale_83")]; tensor block_17_ffn_g_strides_0 = const()[name = string("block_17_ffn_g_strides_0"), val = tensor([1])]; string block_17_ffn_g_pad_type_0 = const()[name = string("block_17_ffn_g_pad_type_0"), val = string("valid")]; tensor block_17_ffn_g_pad_0 = const()[name = string("block_17_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_17_ffn_g_dilations_0 = const()[name = string("block_17_ffn_g_dilations_0"), val = tensor([1])]; int32 block_17_ffn_g_groups_0 = const()[name = string("block_17_ffn_g_groups_0"), val = int32(1)]; tensor block_17_ffn_g = conv(dilations = block_17_ffn_g_dilations_0, groups = block_17_ffn_g_groups_0, pad = block_17_ffn_g_pad_0, pad_type = block_17_ffn_g_pad_type_0, strides = block_17_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_83, x = block_17_ffn_rmsnorm)[name = string("block_17_ffn_g")]; tensor block_17_ffn_g_activation = silu(x = block_17_ffn_g)[name = string("block_17_ffn_g_activation")]; tensor block_17_ffn_x_gated = mul(x = block_17_ffn_inproj, y = block_17_ffn_g_activation)[name = string("block_17_ffn_x_gated")]; tensor block_17_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(496990464))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500259136))))[name = string("block_17_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_84 = constexpr_blockwise_shift_scale(data = block_17_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500287872))))[name = string("constexpr_blockwise_shift_scale_84")]; tensor block_17_ffn_outproj_strides_0 = const()[name = string("block_17_ffn_outproj_strides_0"), val = tensor([1])]; string block_17_ffn_outproj_pad_type_0 = const()[name = string("block_17_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_17_ffn_outproj_pad_0 = const()[name = string("block_17_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_17_ffn_outproj_dilations_0 = const()[name = string("block_17_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_17_ffn_outproj_groups_0 = const()[name = string("block_17_ffn_outproj_groups_0"), val = int32(1)]; tensor block_17_ffn_outproj = conv(dilations = block_17_ffn_outproj_dilations_0, groups = block_17_ffn_outproj_groups_0, pad = block_17_ffn_outproj_pad_0, pad_type = block_17_ffn_outproj_pad_type_0, strides = block_17_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_84, x = block_17_ffn_x_gated)[name = string("block_17_ffn_outproj")]; tensor block_17_residual_2 = add(x = block_17_ffn_outproj, y = block_17_residual_1)[name = string("block_17_residual_2")]; tensor block_18_attention_rmsnorm_abs = abs(x = block_17_residual_2)[name = string("block_18_attention_rmsnorm_abs")]; tensor block_18_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_18_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_18_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_18_attention_rmsnorm_maxval = reduce_max(axes = block_18_attention_rmsnorm_maxval_axes_0, keep_dims = block_18_attention_rmsnorm_maxval_keep_dims_0, x = block_18_attention_rmsnorm_abs)[name = string("block_18_attention_rmsnorm_maxval")]; fp16 block_18_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_18_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_18_attention_rmsnorm_maxval_clipped = clip(alpha = block_18_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_18_attention_rmsnorm_maxval_clipped_beta_0, x = block_18_attention_rmsnorm_maxval)[name = string("block_18_attention_rmsnorm_maxval_clipped")]; tensor block_18_attention_rmsnorm_scaled = real_div(x = block_17_residual_2, y = block_18_attention_rmsnorm_maxval_clipped)[name = string("block_18_attention_rmsnorm_scaled")]; tensor block_18_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_18_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_18_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_18_attention_rmsnorm_squared_sum_keep_dims_0, x = block_18_attention_rmsnorm_scaled)[name = string("block_18_attention_rmsnorm_squared_sum")]; fp16 block_18_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_18_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_18_attention_rmsnorm_rsqrt_epsilon_0, x = block_18_attention_rmsnorm_squared_sum)[name = string("block_18_attention_rmsnorm_rsqrt")]; fp16 block_18_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_18_attention_rmsnorm_dim_scaled = mul(x = block_18_attention_rmsnorm_scaled, y = block_18_attention_rmsnorm_dim_scaled_y_0)[name = string("block_18_attention_rmsnorm_dim_scaled")]; tensor block_18_attention_rmsnorm_normalized = mul(x = block_18_attention_rmsnorm_dim_scaled, y = block_18_attention_rmsnorm_rsqrt)[name = string("block_18_attention_rmsnorm_normalized")]; tensor block_18_attention_rmsnorm_y_0 = const()[name = string("block_18_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500289728)))]; tensor block_18_attention_rmsnorm = mul(x = block_18_attention_rmsnorm_normalized, y = block_18_attention_rmsnorm_y_0)[name = string("block_18_attention_rmsnorm")]; tensor attention_18_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(500291584))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501065792))))[name = string("attention_18_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_85 = constexpr_blockwise_shift_scale(data = attention_18_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501102720))))[name = string("constexpr_blockwise_shift_scale_85")]; tensor attention_18_qkvproj_bias_0 = const()[name = string("attention_18_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501105088)))]; tensor attention_18_qkvproj_strides_0 = const()[name = string("attention_18_qkvproj_strides_0"), val = tensor([1])]; string attention_18_qkvproj_pad_type_0 = const()[name = string("attention_18_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_18_qkvproj_pad_0 = const()[name = string("attention_18_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_18_qkvproj_dilations_0 = const()[name = string("attention_18_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_18_qkvproj_groups_0 = const()[name = string("attention_18_qkvproj_groups_0"), val = int32(1)]; tensor attention_18_qkvproj = conv(bias = attention_18_qkvproj_bias_0, dilations = attention_18_qkvproj_dilations_0, groups = attention_18_qkvproj_groups_0, pad = attention_18_qkvproj_pad_0, pad_type = attention_18_qkvproj_pad_type_0, strides = attention_18_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_85, x = block_18_attention_rmsnorm)[name = string("attention_18_qkvproj")]; tensor attention_18_head_reshape_shape_0 = const()[name = string("attention_18_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_18_head_reshape = reshape(shape = attention_18_head_reshape_shape_0, x = attention_18_qkvproj)[name = string("attention_18_head_reshape")]; tensor attention_18_head_transpose_perm_0 = const()[name = string("attention_18_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_18_split_qkv_heads_axis_0 = const()[name = string("attention_18_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_18_split_qkv_heads_split_sizes_0 = const()[name = string("attention_18_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_18_head_transpose = transpose(perm = attention_18_head_transpose_perm_0, x = attention_18_head_reshape)[name = string("transpose_12")]; tensor attention_18_split_qkv_heads_0, tensor attention_18_split_qkv_heads_1, tensor attention_18_split_qkv_heads_2 = split(axis = attention_18_split_qkv_heads_axis_0, split_sizes = attention_18_split_qkv_heads_split_sizes_0, x = attention_18_head_transpose)[name = string("attention_18_split_qkv_heads")]; tensor attention_18_q_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_18_q_rope_lhs_mult")]; int32 attention_18_q_rotate_half_split_num_splits_0 = const()[name = string("attention_18_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_18_q_rotate_half_split_axis_0 = const()[name = string("attention_18_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_18_q_rotate_half_split_0, tensor attention_18_q_rotate_half_split_1 = split(axis = attention_18_q_rotate_half_split_axis_0, num_splits = attention_18_q_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_0)[name = string("attention_18_q_rotate_half_split")]; fp16 attention_18_q_rotate_half_neg_y_0 = const()[name = string("attention_18_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_18_q_rotate_half_neg = mul(x = attention_18_q_rotate_half_split_1, y = attention_18_q_rotate_half_neg_y_0)[name = string("attention_18_q_rotate_half_neg")]; int32 attention_18_q_rotate_half_concat_axis_0 = const()[name = string("attention_18_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_18_q_rotate_half_concat_interleave_0 = const()[name = string("attention_18_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_18_q_rotate_half_concat = concat(axis = attention_18_q_rotate_half_concat_axis_0, interleave = attention_18_q_rotate_half_concat_interleave_0, values = (attention_18_q_rotate_half_neg, attention_18_q_rotate_half_split_0))[name = string("attention_18_q_rotate_half_concat")]; tensor attention_18_q_rope_rhs_mult = mul(x = attention_18_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_q_rope_rhs_mult")]; tensor attention_18_q_rope = add(x = attention_18_q_rope_lhs_mult, y = attention_18_q_rope_rhs_mult)[name = string("attention_18_q_rope")]; tensor attention_18_k_rope_lhs_mult = mul(x = attention_18_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_18_k_rope_lhs_mult")]; int32 attention_18_k_rotate_half_split_num_splits_0 = const()[name = string("attention_18_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_18_k_rotate_half_split_axis_0 = const()[name = string("attention_18_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_18_k_rotate_half_split_0, tensor attention_18_k_rotate_half_split_1 = split(axis = attention_18_k_rotate_half_split_axis_0, num_splits = attention_18_k_rotate_half_split_num_splits_0, x = attention_18_split_qkv_heads_1)[name = string("attention_18_k_rotate_half_split")]; fp16 attention_18_k_rotate_half_neg_y_0 = const()[name = string("attention_18_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_18_k_rotate_half_neg = mul(x = attention_18_k_rotate_half_split_1, y = attention_18_k_rotate_half_neg_y_0)[name = string("attention_18_k_rotate_half_neg")]; int32 attention_18_k_rotate_half_concat_axis_0 = const()[name = string("attention_18_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_18_k_rotate_half_concat_interleave_0 = const()[name = string("attention_18_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_18_k_rotate_half_concat = concat(axis = attention_18_k_rotate_half_concat_axis_0, interleave = attention_18_k_rotate_half_concat_interleave_0, values = (attention_18_k_rotate_half_neg, attention_18_k_rotate_half_split_0))[name = string("attention_18_k_rotate_half_concat")]; tensor attention_18_k_rope_rhs_mult = mul(x = attention_18_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_18_k_rope_rhs_mult")]; tensor attention_18_k_rope = add(x = attention_18_k_rope_lhs_mult, y = attention_18_k_rope_rhs_mult)[name = string("attention_18_k_rope")]; int32 attention_18_q_splits_axis_0 = const()[name = string("attention_18_q_splits_axis_0"), val = int32(1)]; int32 attention_18_q_splits_num_splits_0 = const()[name = string("attention_18_q_splits_num_splits_0"), val = int32(2)]; tensor attention_18_q_splits_0, tensor attention_18_q_splits_1 = split(axis = attention_18_q_splits_axis_0, num_splits = attention_18_q_splits_num_splits_0, x = attention_18_q_rope)[name = string("attention_18_q_splits")]; tensor attention_18_update_begin_0_values0_0 = const()[name = string("attention_18_update_begin_0_values0_0"), val = tensor([18])]; tensor attention_18_update_begin_0_values1_0 = const()[name = string("attention_18_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_18_update_begin_0_values3_0 = const()[name = string("attention_18_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_18_update_begin_0_axis_0 = const()[name = string("attention_18_update_begin_0_axis_0"), val = int32(0)]; bool attention_18_update_begin_0_interleave_0 = const()[name = string("attention_18_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_18_update_begin_0 = concat(axis = attention_18_update_begin_0_axis_0, interleave = attention_18_update_begin_0_interleave_0, values = (attention_18_update_begin_0_values0_0, attention_18_update_begin_0_values1_0, query_pos1, attention_18_update_begin_0_values3_0))[name = string("attention_18_update_begin_0")]; tensor attention_18_update_end_0_values0_0 = const()[name = string("attention_18_update_end_0_values0_0"), val = tensor([19])]; tensor attention_18_update_end_0_values1_0 = const()[name = string("attention_18_update_end_0_values1_0"), val = tensor([2])]; tensor attention_18_update_end_0_values3_0 = const()[name = string("attention_18_update_end_0_values3_0"), val = tensor([64])]; int32 attention_18_update_end_0_axis_0 = const()[name = string("attention_18_update_end_0_axis_0"), val = int32(0)]; bool attention_18_update_end_0_interleave_0 = const()[name = string("attention_18_update_end_0_interleave_0"), val = bool(false)]; tensor attention_18_update_end_0 = concat(axis = attention_18_update_end_0_axis_0, interleave = attention_18_update_end_0_interleave_0, values = (attention_18_update_end_0_values0_0, attention_18_update_end_0_values1_0, end_pos_0, attention_18_update_end_0_values3_0))[name = string("attention_18_update_end_0")]; tensor attention_18_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_updated_key_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_key_cache_0_squeeze_mask_0, update = attention_18_k_rope, x = coreml_update_state_34)[name = string("attention_18_updated_key_cache_0")]; write_state(data = attention_18_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_36 = read_state(input = key_cache_state)[name = string("coreml_update_state_84")]; tensor attention_18_key_cache_begin_0 = const()[name = string("attention_18_key_cache_begin_0"), val = tensor([18, 0, 0, 0])]; tensor attention_18_key_cache_end_0 = const()[name = string("attention_18_key_cache_end_0"), val = tensor([19, 2, 512, 64])]; tensor attention_18_key_cache_squeeze_mask_0 = const()[name = string("attention_18_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_key_cache = slice_by_index(begin = attention_18_key_cache_begin_0, end = attention_18_key_cache_end_0, squeeze_mask = attention_18_key_cache_squeeze_mask_0, x = coreml_update_state_36)[name = string("attention_18_key_cache")]; int32 attention_18_key_cache_head_axis_0 = const()[name = string("attention_18_key_cache_head_axis_0"), val = int32(1)]; int32 attention_18_key_cache_head_num_splits_0 = const()[name = string("attention_18_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_18_key_cache_head_0, tensor attention_18_key_cache_head_1 = split(axis = attention_18_key_cache_head_axis_0, num_splits = attention_18_key_cache_head_num_splits_0, x = attention_18_key_cache)[name = string("attention_18_key_cache_head")]; tensor attention_18_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_18_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_updated_value_cache_0 = slice_update(begin = attention_18_update_begin_0, end = attention_18_update_end_0, squeeze_mask = attention_18_updated_value_cache_0_squeeze_mask_0, update = attention_18_split_qkv_heads_2, x = coreml_update_state_35)[name = string("attention_18_updated_value_cache_0")]; write_state(data = attention_18_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_37 = read_state(input = value_cache_state)[name = string("coreml_update_state_85")]; tensor attention_18_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_18_slice_current_layer_value_cache_begin_0"), val = tensor([18, 0, 0, 0])]; tensor attention_18_slice_current_layer_value_cache_end_0 = const()[name = string("attention_18_slice_current_layer_value_cache_end_0"), val = tensor([19, 2, 512, 64])]; tensor attention_18_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_18_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_18_slice_current_layer_value_cache = slice_by_index(begin = attention_18_slice_current_layer_value_cache_begin_0, end = attention_18_slice_current_layer_value_cache_end_0, squeeze_mask = attention_18_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_37)[name = string("attention_18_slice_current_layer_value_cache")]; int32 attention_18_slice_value_cache_heads_axis_0 = const()[name = string("attention_18_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_18_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_18_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_18_slice_value_cache_heads_0, tensor attention_18_slice_value_cache_heads_1 = split(axis = attention_18_slice_value_cache_heads_axis_0, num_splits = attention_18_slice_value_cache_heads_num_splits_0, x = attention_18_slice_current_layer_value_cache)[name = string("attention_18_slice_value_cache_heads")]; bool attention_18_scores_0_transpose_y_0 = const()[name = string("attention_18_scores_0_transpose_y_0"), val = bool(true)]; bool attention_18_scores_0_transpose_x_0 = const()[name = string("attention_18_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_18_scores_0 = matmul(transpose_x = attention_18_scores_0_transpose_x_0, transpose_y = attention_18_scores_0_transpose_y_0, x = attention_18_key_cache_head_0, y = attention_18_q_splits_0)[name = string("attention_18_scores_0")]; fp16 attention_18_scaled_scores_0_y_0 = const()[name = string("attention_18_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_18_scaled_scores_0 = mul(x = attention_18_scores_0, y = attention_18_scaled_scores_0_y_0)[name = string("attention_18_scaled_scores_0")]; tensor attention_18_masked_scaled_scores_0 = add(x = attention_18_scaled_scores_0, y = transpose_0)[name = string("attention_18_masked_scaled_scores_0")]; int32 softmax_36_axis_0 = const()[name = string("softmax_36_axis_0"), val = int32(-2)]; tensor softmax_36 = softmax(axis = softmax_36_axis_0, x = attention_18_masked_scaled_scores_0)[name = string("softmax_36")]; bool attention_18_attention_0_transpose_x_0 = const()[name = string("attention_18_attention_0_transpose_x_0"), val = bool(true)]; bool attention_18_attention_0_transpose_y_0 = const()[name = string("attention_18_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_18_attention_0 = matmul(transpose_x = attention_18_attention_0_transpose_x_0, transpose_y = attention_18_attention_0_transpose_y_0, x = softmax_36, y = attention_18_slice_value_cache_heads_0)[name = string("attention_18_attention_0")]; bool attention_18_scores_1_transpose_y_0 = const()[name = string("attention_18_scores_1_transpose_y_0"), val = bool(true)]; bool attention_18_scores_1_transpose_x_0 = const()[name = string("attention_18_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_18_scores_1 = matmul(transpose_x = attention_18_scores_1_transpose_x_0, transpose_y = attention_18_scores_1_transpose_y_0, x = attention_18_key_cache_head_1, y = attention_18_q_splits_1)[name = string("attention_18_scores_1")]; fp16 attention_18_scaled_scores_1_y_0 = const()[name = string("attention_18_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_18_scaled_scores_1 = mul(x = attention_18_scores_1, y = attention_18_scaled_scores_1_y_0)[name = string("attention_18_scaled_scores_1")]; tensor attention_18_masked_scaled_scores_1 = add(x = attention_18_scaled_scores_1, y = transpose_0)[name = string("attention_18_masked_scaled_scores_1")]; int32 softmax_37_axis_0 = const()[name = string("softmax_37_axis_0"), val = int32(-2)]; tensor softmax_37 = softmax(axis = softmax_37_axis_0, x = attention_18_masked_scaled_scores_1)[name = string("softmax_37")]; bool attention_18_attention_1_transpose_x_0 = const()[name = string("attention_18_attention_1_transpose_x_0"), val = bool(true)]; bool attention_18_attention_1_transpose_y_0 = const()[name = string("attention_18_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_18_attention_1 = matmul(transpose_x = attention_18_attention_1_transpose_x_0, transpose_y = attention_18_attention_1_transpose_y_0, x = softmax_37, y = attention_18_slice_value_cache_heads_1)[name = string("attention_18_attention_1")]; int32 attention_18_concat_attention_all_heads_axis_0 = const()[name = string("attention_18_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_18_concat_attention_all_heads_interleave_0 = const()[name = string("attention_18_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_18_concat_attention_all_heads = concat(axis = attention_18_concat_attention_all_heads_axis_0, interleave = attention_18_concat_attention_all_heads_interleave_0, values = (attention_18_attention_0, attention_18_attention_1))[name = string("attention_18_concat_attention_all_heads")]; tensor attention_18_channels_first_retransposed_perm_0 = const()[name = string("attention_18_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_18_reshaped_shape_0 = const()[name = string("attention_18_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_18_channels_first_retransposed = transpose(perm = attention_18_channels_first_retransposed_perm_0, x = attention_18_concat_attention_all_heads)[name = string("transpose_11")]; tensor attention_18_reshaped = reshape(shape = attention_18_reshaped_shape_0, x = attention_18_channels_first_retransposed)[name = string("attention_18_reshaped")]; tensor attention_18_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501107456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501709632))))[name = string("attention_18_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_86 = constexpr_blockwise_shift_scale(data = attention_18_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501738368))))[name = string("constexpr_blockwise_shift_scale_86")]; tensor attention_18_outproj_strides_0 = const()[name = string("attention_18_outproj_strides_0"), val = tensor([1])]; string attention_18_outproj_pad_type_0 = const()[name = string("attention_18_outproj_pad_type_0"), val = string("valid")]; tensor attention_18_outproj_pad_0 = const()[name = string("attention_18_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_18_outproj_dilations_0 = const()[name = string("attention_18_outproj_dilations_0"), val = tensor([1])]; int32 attention_18_outproj_groups_0 = const()[name = string("attention_18_outproj_groups_0"), val = int32(1)]; tensor attention_18_outproj = conv(dilations = attention_18_outproj_dilations_0, groups = attention_18_outproj_groups_0, pad = attention_18_outproj_pad_0, pad_type = attention_18_outproj_pad_type_0, strides = attention_18_outproj_strides_0, weight = constexpr_blockwise_shift_scale_86, x = attention_18_reshaped)[name = string("attention_18_outproj")]; tensor block_18_residual_1 = add(x = block_17_residual_2, y = attention_18_outproj)[name = string("block_18_residual_1")]; tensor block_18_ffn_rmsnorm_abs = abs(x = block_18_residual_1)[name = string("block_18_ffn_rmsnorm_abs")]; tensor block_18_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_18_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_18_ffn_rmsnorm_maxval = reduce_max(axes = block_18_ffn_rmsnorm_maxval_axes_0, keep_dims = block_18_ffn_rmsnorm_maxval_keep_dims_0, x = block_18_ffn_rmsnorm_abs)[name = string("block_18_ffn_rmsnorm_maxval")]; fp16 block_18_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_18_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_18_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_18_ffn_rmsnorm_maxval_clipped = clip(alpha = block_18_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_18_ffn_rmsnorm_maxval_clipped_beta_0, x = block_18_ffn_rmsnorm_maxval)[name = string("block_18_ffn_rmsnorm_maxval_clipped")]; tensor block_18_ffn_rmsnorm_scaled = real_div(x = block_18_residual_1, y = block_18_ffn_rmsnorm_maxval_clipped)[name = string("block_18_ffn_rmsnorm_scaled")]; tensor block_18_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_18_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_18_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_18_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_18_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_18_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_18_ffn_rmsnorm_scaled)[name = string("block_18_ffn_rmsnorm_squared_sum")]; fp16 block_18_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_18_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_18_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_18_ffn_rmsnorm_rsqrt_epsilon_0, x = block_18_ffn_rmsnorm_squared_sum)[name = string("block_18_ffn_rmsnorm_rsqrt")]; fp16 block_18_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_18_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_18_ffn_rmsnorm_dim_scaled = mul(x = block_18_ffn_rmsnorm_scaled, y = block_18_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_18_ffn_rmsnorm_dim_scaled")]; tensor block_18_ffn_rmsnorm_normalized = mul(x = block_18_ffn_rmsnorm_dim_scaled, y = block_18_ffn_rmsnorm_rsqrt)[name = string("block_18_ffn_rmsnorm_normalized")]; tensor block_18_ffn_rmsnorm_y_0 = const()[name = string("block_18_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501740224)))]; tensor block_18_ffn_rmsnorm = mul(x = block_18_ffn_rmsnorm_normalized, y = block_18_ffn_rmsnorm_y_0)[name = string("block_18_ffn_rmsnorm")]; tensor block_18_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(501742080))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505010752))))[name = string("block_18_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_87 = constexpr_blockwise_shift_scale(data = block_18_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505166464))))[name = string("constexpr_blockwise_shift_scale_87")]; tensor block_18_ffn_inproj_strides_0 = const()[name = string("block_18_ffn_inproj_strides_0"), val = tensor([1])]; string block_18_ffn_inproj_pad_type_0 = const()[name = string("block_18_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_18_ffn_inproj_pad_0 = const()[name = string("block_18_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_18_ffn_inproj_dilations_0 = const()[name = string("block_18_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_18_ffn_inproj_groups_0 = const()[name = string("block_18_ffn_inproj_groups_0"), val = int32(1)]; tensor block_18_ffn_inproj = conv(dilations = block_18_ffn_inproj_dilations_0, groups = block_18_ffn_inproj_groups_0, pad = block_18_ffn_inproj_pad_0, pad_type = block_18_ffn_inproj_pad_type_0, strides = block_18_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_87, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_inproj")]; tensor block_18_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505176256))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508444928))))[name = string("block_18_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_88 = constexpr_blockwise_shift_scale(data = block_18_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508600640))))[name = string("constexpr_blockwise_shift_scale_88")]; tensor block_18_ffn_g_strides_0 = const()[name = string("block_18_ffn_g_strides_0"), val = tensor([1])]; string block_18_ffn_g_pad_type_0 = const()[name = string("block_18_ffn_g_pad_type_0"), val = string("valid")]; tensor block_18_ffn_g_pad_0 = const()[name = string("block_18_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_18_ffn_g_dilations_0 = const()[name = string("block_18_ffn_g_dilations_0"), val = tensor([1])]; int32 block_18_ffn_g_groups_0 = const()[name = string("block_18_ffn_g_groups_0"), val = int32(1)]; tensor block_18_ffn_g = conv(dilations = block_18_ffn_g_dilations_0, groups = block_18_ffn_g_groups_0, pad = block_18_ffn_g_pad_0, pad_type = block_18_ffn_g_pad_type_0, strides = block_18_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_88, x = block_18_ffn_rmsnorm)[name = string("block_18_ffn_g")]; tensor block_18_ffn_g_activation = silu(x = block_18_ffn_g)[name = string("block_18_ffn_g_activation")]; tensor block_18_ffn_x_gated = mul(x = block_18_ffn_inproj, y = block_18_ffn_g_activation)[name = string("block_18_ffn_x_gated")]; tensor block_18_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508610432))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511879104))))[name = string("block_18_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_89 = constexpr_blockwise_shift_scale(data = block_18_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511907840))))[name = string("constexpr_blockwise_shift_scale_89")]; tensor block_18_ffn_outproj_strides_0 = const()[name = string("block_18_ffn_outproj_strides_0"), val = tensor([1])]; string block_18_ffn_outproj_pad_type_0 = const()[name = string("block_18_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_18_ffn_outproj_pad_0 = const()[name = string("block_18_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_18_ffn_outproj_dilations_0 = const()[name = string("block_18_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_18_ffn_outproj_groups_0 = const()[name = string("block_18_ffn_outproj_groups_0"), val = int32(1)]; tensor block_18_ffn_outproj = conv(dilations = block_18_ffn_outproj_dilations_0, groups = block_18_ffn_outproj_groups_0, pad = block_18_ffn_outproj_pad_0, pad_type = block_18_ffn_outproj_pad_type_0, strides = block_18_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_89, x = block_18_ffn_x_gated)[name = string("block_18_ffn_outproj")]; tensor block_18_residual_2 = add(x = block_18_ffn_outproj, y = block_18_residual_1)[name = string("block_18_residual_2")]; tensor block_19_attention_rmsnorm_abs = abs(x = block_18_residual_2)[name = string("block_19_attention_rmsnorm_abs")]; tensor block_19_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_19_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_19_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_19_attention_rmsnorm_maxval = reduce_max(axes = block_19_attention_rmsnorm_maxval_axes_0, keep_dims = block_19_attention_rmsnorm_maxval_keep_dims_0, x = block_19_attention_rmsnorm_abs)[name = string("block_19_attention_rmsnorm_maxval")]; fp16 block_19_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_19_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_19_attention_rmsnorm_maxval_clipped = clip(alpha = block_19_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_19_attention_rmsnorm_maxval_clipped_beta_0, x = block_19_attention_rmsnorm_maxval)[name = string("block_19_attention_rmsnorm_maxval_clipped")]; tensor block_19_attention_rmsnorm_scaled = real_div(x = block_18_residual_2, y = block_19_attention_rmsnorm_maxval_clipped)[name = string("block_19_attention_rmsnorm_scaled")]; tensor block_19_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_19_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_19_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_19_attention_rmsnorm_squared_sum_keep_dims_0, x = block_19_attention_rmsnorm_scaled)[name = string("block_19_attention_rmsnorm_squared_sum")]; fp16 block_19_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_19_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_19_attention_rmsnorm_rsqrt_epsilon_0, x = block_19_attention_rmsnorm_squared_sum)[name = string("block_19_attention_rmsnorm_rsqrt")]; fp16 block_19_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_19_attention_rmsnorm_dim_scaled = mul(x = block_19_attention_rmsnorm_scaled, y = block_19_attention_rmsnorm_dim_scaled_y_0)[name = string("block_19_attention_rmsnorm_dim_scaled")]; tensor block_19_attention_rmsnorm_normalized = mul(x = block_19_attention_rmsnorm_dim_scaled, y = block_19_attention_rmsnorm_rsqrt)[name = string("block_19_attention_rmsnorm_normalized")]; tensor block_19_attention_rmsnorm_y_0 = const()[name = string("block_19_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511909696)))]; tensor block_19_attention_rmsnorm = mul(x = block_19_attention_rmsnorm_normalized, y = block_19_attention_rmsnorm_y_0)[name = string("block_19_attention_rmsnorm")]; tensor attention_19_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(511911552))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512685760))))[name = string("attention_19_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_90 = constexpr_blockwise_shift_scale(data = attention_19_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512722688))))[name = string("constexpr_blockwise_shift_scale_90")]; tensor attention_19_qkvproj_bias_0 = const()[name = string("attention_19_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512725056)))]; tensor attention_19_qkvproj_strides_0 = const()[name = string("attention_19_qkvproj_strides_0"), val = tensor([1])]; string attention_19_qkvproj_pad_type_0 = const()[name = string("attention_19_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_19_qkvproj_pad_0 = const()[name = string("attention_19_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_19_qkvproj_dilations_0 = const()[name = string("attention_19_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_19_qkvproj_groups_0 = const()[name = string("attention_19_qkvproj_groups_0"), val = int32(1)]; tensor attention_19_qkvproj = conv(bias = attention_19_qkvproj_bias_0, dilations = attention_19_qkvproj_dilations_0, groups = attention_19_qkvproj_groups_0, pad = attention_19_qkvproj_pad_0, pad_type = attention_19_qkvproj_pad_type_0, strides = attention_19_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_90, x = block_19_attention_rmsnorm)[name = string("attention_19_qkvproj")]; tensor attention_19_head_reshape_shape_0 = const()[name = string("attention_19_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_19_head_reshape = reshape(shape = attention_19_head_reshape_shape_0, x = attention_19_qkvproj)[name = string("attention_19_head_reshape")]; tensor attention_19_head_transpose_perm_0 = const()[name = string("attention_19_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_19_split_qkv_heads_axis_0 = const()[name = string("attention_19_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_19_split_qkv_heads_split_sizes_0 = const()[name = string("attention_19_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_19_head_transpose = transpose(perm = attention_19_head_transpose_perm_0, x = attention_19_head_reshape)[name = string("transpose_10")]; tensor attention_19_split_qkv_heads_0, tensor attention_19_split_qkv_heads_1, tensor attention_19_split_qkv_heads_2 = split(axis = attention_19_split_qkv_heads_axis_0, split_sizes = attention_19_split_qkv_heads_split_sizes_0, x = attention_19_head_transpose)[name = string("attention_19_split_qkv_heads")]; tensor attention_19_q_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_19_q_rope_lhs_mult")]; int32 attention_19_q_rotate_half_split_num_splits_0 = const()[name = string("attention_19_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_19_q_rotate_half_split_axis_0 = const()[name = string("attention_19_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_19_q_rotate_half_split_0, tensor attention_19_q_rotate_half_split_1 = split(axis = attention_19_q_rotate_half_split_axis_0, num_splits = attention_19_q_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_0)[name = string("attention_19_q_rotate_half_split")]; fp16 attention_19_q_rotate_half_neg_y_0 = const()[name = string("attention_19_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_19_q_rotate_half_neg = mul(x = attention_19_q_rotate_half_split_1, y = attention_19_q_rotate_half_neg_y_0)[name = string("attention_19_q_rotate_half_neg")]; int32 attention_19_q_rotate_half_concat_axis_0 = const()[name = string("attention_19_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_19_q_rotate_half_concat_interleave_0 = const()[name = string("attention_19_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_19_q_rotate_half_concat = concat(axis = attention_19_q_rotate_half_concat_axis_0, interleave = attention_19_q_rotate_half_concat_interleave_0, values = (attention_19_q_rotate_half_neg, attention_19_q_rotate_half_split_0))[name = string("attention_19_q_rotate_half_concat")]; tensor attention_19_q_rope_rhs_mult = mul(x = attention_19_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_q_rope_rhs_mult")]; tensor attention_19_q_rope = add(x = attention_19_q_rope_lhs_mult, y = attention_19_q_rope_rhs_mult)[name = string("attention_19_q_rope")]; tensor attention_19_k_rope_lhs_mult = mul(x = attention_19_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_19_k_rope_lhs_mult")]; int32 attention_19_k_rotate_half_split_num_splits_0 = const()[name = string("attention_19_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_19_k_rotate_half_split_axis_0 = const()[name = string("attention_19_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_19_k_rotate_half_split_0, tensor attention_19_k_rotate_half_split_1 = split(axis = attention_19_k_rotate_half_split_axis_0, num_splits = attention_19_k_rotate_half_split_num_splits_0, x = attention_19_split_qkv_heads_1)[name = string("attention_19_k_rotate_half_split")]; fp16 attention_19_k_rotate_half_neg_y_0 = const()[name = string("attention_19_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_19_k_rotate_half_neg = mul(x = attention_19_k_rotate_half_split_1, y = attention_19_k_rotate_half_neg_y_0)[name = string("attention_19_k_rotate_half_neg")]; int32 attention_19_k_rotate_half_concat_axis_0 = const()[name = string("attention_19_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_19_k_rotate_half_concat_interleave_0 = const()[name = string("attention_19_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_19_k_rotate_half_concat = concat(axis = attention_19_k_rotate_half_concat_axis_0, interleave = attention_19_k_rotate_half_concat_interleave_0, values = (attention_19_k_rotate_half_neg, attention_19_k_rotate_half_split_0))[name = string("attention_19_k_rotate_half_concat")]; tensor attention_19_k_rope_rhs_mult = mul(x = attention_19_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_19_k_rope_rhs_mult")]; tensor attention_19_k_rope = add(x = attention_19_k_rope_lhs_mult, y = attention_19_k_rope_rhs_mult)[name = string("attention_19_k_rope")]; int32 attention_19_q_splits_axis_0 = const()[name = string("attention_19_q_splits_axis_0"), val = int32(1)]; int32 attention_19_q_splits_num_splits_0 = const()[name = string("attention_19_q_splits_num_splits_0"), val = int32(2)]; tensor attention_19_q_splits_0, tensor attention_19_q_splits_1 = split(axis = attention_19_q_splits_axis_0, num_splits = attention_19_q_splits_num_splits_0, x = attention_19_q_rope)[name = string("attention_19_q_splits")]; tensor attention_19_update_begin_0_values0_0 = const()[name = string("attention_19_update_begin_0_values0_0"), val = tensor([19])]; tensor attention_19_update_begin_0_values1_0 = const()[name = string("attention_19_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_19_update_begin_0_values3_0 = const()[name = string("attention_19_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_19_update_begin_0_axis_0 = const()[name = string("attention_19_update_begin_0_axis_0"), val = int32(0)]; bool attention_19_update_begin_0_interleave_0 = const()[name = string("attention_19_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_19_update_begin_0 = concat(axis = attention_19_update_begin_0_axis_0, interleave = attention_19_update_begin_0_interleave_0, values = (attention_19_update_begin_0_values0_0, attention_19_update_begin_0_values1_0, query_pos1, attention_19_update_begin_0_values3_0))[name = string("attention_19_update_begin_0")]; tensor attention_19_update_end_0_values0_0 = const()[name = string("attention_19_update_end_0_values0_0"), val = tensor([20])]; tensor attention_19_update_end_0_values1_0 = const()[name = string("attention_19_update_end_0_values1_0"), val = tensor([2])]; tensor attention_19_update_end_0_values3_0 = const()[name = string("attention_19_update_end_0_values3_0"), val = tensor([64])]; int32 attention_19_update_end_0_axis_0 = const()[name = string("attention_19_update_end_0_axis_0"), val = int32(0)]; bool attention_19_update_end_0_interleave_0 = const()[name = string("attention_19_update_end_0_interleave_0"), val = bool(false)]; tensor attention_19_update_end_0 = concat(axis = attention_19_update_end_0_axis_0, interleave = attention_19_update_end_0_interleave_0, values = (attention_19_update_end_0_values0_0, attention_19_update_end_0_values1_0, end_pos_0, attention_19_update_end_0_values3_0))[name = string("attention_19_update_end_0")]; tensor attention_19_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_updated_key_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_key_cache_0_squeeze_mask_0, update = attention_19_k_rope, x = coreml_update_state_36)[name = string("attention_19_updated_key_cache_0")]; write_state(data = attention_19_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_38 = read_state(input = key_cache_state)[name = string("coreml_update_state_86")]; tensor attention_19_key_cache_begin_0 = const()[name = string("attention_19_key_cache_begin_0"), val = tensor([19, 0, 0, 0])]; tensor attention_19_key_cache_end_0 = const()[name = string("attention_19_key_cache_end_0"), val = tensor([20, 2, 512, 64])]; tensor attention_19_key_cache_squeeze_mask_0 = const()[name = string("attention_19_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_key_cache = slice_by_index(begin = attention_19_key_cache_begin_0, end = attention_19_key_cache_end_0, squeeze_mask = attention_19_key_cache_squeeze_mask_0, x = coreml_update_state_38)[name = string("attention_19_key_cache")]; int32 attention_19_key_cache_head_axis_0 = const()[name = string("attention_19_key_cache_head_axis_0"), val = int32(1)]; int32 attention_19_key_cache_head_num_splits_0 = const()[name = string("attention_19_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_19_key_cache_head_0, tensor attention_19_key_cache_head_1 = split(axis = attention_19_key_cache_head_axis_0, num_splits = attention_19_key_cache_head_num_splits_0, x = attention_19_key_cache)[name = string("attention_19_key_cache_head")]; tensor attention_19_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_19_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_updated_value_cache_0 = slice_update(begin = attention_19_update_begin_0, end = attention_19_update_end_0, squeeze_mask = attention_19_updated_value_cache_0_squeeze_mask_0, update = attention_19_split_qkv_heads_2, x = coreml_update_state_37)[name = string("attention_19_updated_value_cache_0")]; write_state(data = attention_19_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_39 = read_state(input = value_cache_state)[name = string("coreml_update_state_87")]; tensor attention_19_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_19_slice_current_layer_value_cache_begin_0"), val = tensor([19, 0, 0, 0])]; tensor attention_19_slice_current_layer_value_cache_end_0 = const()[name = string("attention_19_slice_current_layer_value_cache_end_0"), val = tensor([20, 2, 512, 64])]; tensor attention_19_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_19_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_19_slice_current_layer_value_cache = slice_by_index(begin = attention_19_slice_current_layer_value_cache_begin_0, end = attention_19_slice_current_layer_value_cache_end_0, squeeze_mask = attention_19_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_39)[name = string("attention_19_slice_current_layer_value_cache")]; int32 attention_19_slice_value_cache_heads_axis_0 = const()[name = string("attention_19_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_19_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_19_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_19_slice_value_cache_heads_0, tensor attention_19_slice_value_cache_heads_1 = split(axis = attention_19_slice_value_cache_heads_axis_0, num_splits = attention_19_slice_value_cache_heads_num_splits_0, x = attention_19_slice_current_layer_value_cache)[name = string("attention_19_slice_value_cache_heads")]; bool attention_19_scores_0_transpose_y_0 = const()[name = string("attention_19_scores_0_transpose_y_0"), val = bool(true)]; bool attention_19_scores_0_transpose_x_0 = const()[name = string("attention_19_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_19_scores_0 = matmul(transpose_x = attention_19_scores_0_transpose_x_0, transpose_y = attention_19_scores_0_transpose_y_0, x = attention_19_key_cache_head_0, y = attention_19_q_splits_0)[name = string("attention_19_scores_0")]; fp16 attention_19_scaled_scores_0_y_0 = const()[name = string("attention_19_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_19_scaled_scores_0 = mul(x = attention_19_scores_0, y = attention_19_scaled_scores_0_y_0)[name = string("attention_19_scaled_scores_0")]; tensor attention_19_masked_scaled_scores_0 = add(x = attention_19_scaled_scores_0, y = transpose_0)[name = string("attention_19_masked_scaled_scores_0")]; int32 softmax_38_axis_0 = const()[name = string("softmax_38_axis_0"), val = int32(-2)]; tensor softmax_38 = softmax(axis = softmax_38_axis_0, x = attention_19_masked_scaled_scores_0)[name = string("softmax_38")]; bool attention_19_attention_0_transpose_x_0 = const()[name = string("attention_19_attention_0_transpose_x_0"), val = bool(true)]; bool attention_19_attention_0_transpose_y_0 = const()[name = string("attention_19_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_19_attention_0 = matmul(transpose_x = attention_19_attention_0_transpose_x_0, transpose_y = attention_19_attention_0_transpose_y_0, x = softmax_38, y = attention_19_slice_value_cache_heads_0)[name = string("attention_19_attention_0")]; bool attention_19_scores_1_transpose_y_0 = const()[name = string("attention_19_scores_1_transpose_y_0"), val = bool(true)]; bool attention_19_scores_1_transpose_x_0 = const()[name = string("attention_19_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_19_scores_1 = matmul(transpose_x = attention_19_scores_1_transpose_x_0, transpose_y = attention_19_scores_1_transpose_y_0, x = attention_19_key_cache_head_1, y = attention_19_q_splits_1)[name = string("attention_19_scores_1")]; fp16 attention_19_scaled_scores_1_y_0 = const()[name = string("attention_19_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_19_scaled_scores_1 = mul(x = attention_19_scores_1, y = attention_19_scaled_scores_1_y_0)[name = string("attention_19_scaled_scores_1")]; tensor attention_19_masked_scaled_scores_1 = add(x = attention_19_scaled_scores_1, y = transpose_0)[name = string("attention_19_masked_scaled_scores_1")]; int32 softmax_39_axis_0 = const()[name = string("softmax_39_axis_0"), val = int32(-2)]; tensor softmax_39 = softmax(axis = softmax_39_axis_0, x = attention_19_masked_scaled_scores_1)[name = string("softmax_39")]; bool attention_19_attention_1_transpose_x_0 = const()[name = string("attention_19_attention_1_transpose_x_0"), val = bool(true)]; bool attention_19_attention_1_transpose_y_0 = const()[name = string("attention_19_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_19_attention_1 = matmul(transpose_x = attention_19_attention_1_transpose_x_0, transpose_y = attention_19_attention_1_transpose_y_0, x = softmax_39, y = attention_19_slice_value_cache_heads_1)[name = string("attention_19_attention_1")]; int32 attention_19_concat_attention_all_heads_axis_0 = const()[name = string("attention_19_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_19_concat_attention_all_heads_interleave_0 = const()[name = string("attention_19_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_19_concat_attention_all_heads = concat(axis = attention_19_concat_attention_all_heads_axis_0, interleave = attention_19_concat_attention_all_heads_interleave_0, values = (attention_19_attention_0, attention_19_attention_1))[name = string("attention_19_concat_attention_all_heads")]; tensor attention_19_channels_first_retransposed_perm_0 = const()[name = string("attention_19_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_19_reshaped_shape_0 = const()[name = string("attention_19_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_19_channels_first_retransposed = transpose(perm = attention_19_channels_first_retransposed_perm_0, x = attention_19_concat_attention_all_heads)[name = string("transpose_9")]; tensor attention_19_reshaped = reshape(shape = attention_19_reshaped_shape_0, x = attention_19_channels_first_retransposed)[name = string("attention_19_reshaped")]; tensor attention_19_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512727424))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513329600))))[name = string("attention_19_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_91 = constexpr_blockwise_shift_scale(data = attention_19_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513358336))))[name = string("constexpr_blockwise_shift_scale_91")]; tensor attention_19_outproj_strides_0 = const()[name = string("attention_19_outproj_strides_0"), val = tensor([1])]; string attention_19_outproj_pad_type_0 = const()[name = string("attention_19_outproj_pad_type_0"), val = string("valid")]; tensor attention_19_outproj_pad_0 = const()[name = string("attention_19_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_19_outproj_dilations_0 = const()[name = string("attention_19_outproj_dilations_0"), val = tensor([1])]; int32 attention_19_outproj_groups_0 = const()[name = string("attention_19_outproj_groups_0"), val = int32(1)]; tensor attention_19_outproj = conv(dilations = attention_19_outproj_dilations_0, groups = attention_19_outproj_groups_0, pad = attention_19_outproj_pad_0, pad_type = attention_19_outproj_pad_type_0, strides = attention_19_outproj_strides_0, weight = constexpr_blockwise_shift_scale_91, x = attention_19_reshaped)[name = string("attention_19_outproj")]; tensor block_19_residual_1 = add(x = block_18_residual_2, y = attention_19_outproj)[name = string("block_19_residual_1")]; tensor block_19_ffn_rmsnorm_abs = abs(x = block_19_residual_1)[name = string("block_19_ffn_rmsnorm_abs")]; tensor block_19_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_19_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_19_ffn_rmsnorm_maxval = reduce_max(axes = block_19_ffn_rmsnorm_maxval_axes_0, keep_dims = block_19_ffn_rmsnorm_maxval_keep_dims_0, x = block_19_ffn_rmsnorm_abs)[name = string("block_19_ffn_rmsnorm_maxval")]; fp16 block_19_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_19_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_19_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_19_ffn_rmsnorm_maxval_clipped = clip(alpha = block_19_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_19_ffn_rmsnorm_maxval_clipped_beta_0, x = block_19_ffn_rmsnorm_maxval)[name = string("block_19_ffn_rmsnorm_maxval_clipped")]; tensor block_19_ffn_rmsnorm_scaled = real_div(x = block_19_residual_1, y = block_19_ffn_rmsnorm_maxval_clipped)[name = string("block_19_ffn_rmsnorm_scaled")]; tensor block_19_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_19_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_19_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_19_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_19_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_19_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_19_ffn_rmsnorm_scaled)[name = string("block_19_ffn_rmsnorm_squared_sum")]; fp16 block_19_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_19_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_19_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_19_ffn_rmsnorm_rsqrt_epsilon_0, x = block_19_ffn_rmsnorm_squared_sum)[name = string("block_19_ffn_rmsnorm_rsqrt")]; fp16 block_19_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_19_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_19_ffn_rmsnorm_dim_scaled = mul(x = block_19_ffn_rmsnorm_scaled, y = block_19_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_19_ffn_rmsnorm_dim_scaled")]; tensor block_19_ffn_rmsnorm_normalized = mul(x = block_19_ffn_rmsnorm_dim_scaled, y = block_19_ffn_rmsnorm_rsqrt)[name = string("block_19_ffn_rmsnorm_normalized")]; tensor block_19_ffn_rmsnorm_y_0 = const()[name = string("block_19_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513360192)))]; tensor block_19_ffn_rmsnorm = mul(x = block_19_ffn_rmsnorm_normalized, y = block_19_ffn_rmsnorm_y_0)[name = string("block_19_ffn_rmsnorm")]; tensor block_19_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513362048))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516630720))))[name = string("block_19_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_92 = constexpr_blockwise_shift_scale(data = block_19_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516786432))))[name = string("constexpr_blockwise_shift_scale_92")]; tensor block_19_ffn_inproj_strides_0 = const()[name = string("block_19_ffn_inproj_strides_0"), val = tensor([1])]; string block_19_ffn_inproj_pad_type_0 = const()[name = string("block_19_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_19_ffn_inproj_pad_0 = const()[name = string("block_19_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_19_ffn_inproj_dilations_0 = const()[name = string("block_19_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_19_ffn_inproj_groups_0 = const()[name = string("block_19_ffn_inproj_groups_0"), val = int32(1)]; tensor block_19_ffn_inproj = conv(dilations = block_19_ffn_inproj_dilations_0, groups = block_19_ffn_inproj_groups_0, pad = block_19_ffn_inproj_pad_0, pad_type = block_19_ffn_inproj_pad_type_0, strides = block_19_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_92, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_inproj")]; tensor block_19_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(516796224))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520064896))))[name = string("block_19_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_93 = constexpr_blockwise_shift_scale(data = block_19_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520220608))))[name = string("constexpr_blockwise_shift_scale_93")]; tensor block_19_ffn_g_strides_0 = const()[name = string("block_19_ffn_g_strides_0"), val = tensor([1])]; string block_19_ffn_g_pad_type_0 = const()[name = string("block_19_ffn_g_pad_type_0"), val = string("valid")]; tensor block_19_ffn_g_pad_0 = const()[name = string("block_19_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_19_ffn_g_dilations_0 = const()[name = string("block_19_ffn_g_dilations_0"), val = tensor([1])]; int32 block_19_ffn_g_groups_0 = const()[name = string("block_19_ffn_g_groups_0"), val = int32(1)]; tensor block_19_ffn_g = conv(dilations = block_19_ffn_g_dilations_0, groups = block_19_ffn_g_groups_0, pad = block_19_ffn_g_pad_0, pad_type = block_19_ffn_g_pad_type_0, strides = block_19_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_93, x = block_19_ffn_rmsnorm)[name = string("block_19_ffn_g")]; tensor block_19_ffn_g_activation = silu(x = block_19_ffn_g)[name = string("block_19_ffn_g_activation")]; tensor block_19_ffn_x_gated = mul(x = block_19_ffn_inproj, y = block_19_ffn_g_activation)[name = string("block_19_ffn_x_gated")]; tensor block_19_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(520230400))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523499072))))[name = string("block_19_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_94 = constexpr_blockwise_shift_scale(data = block_19_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523527808))))[name = string("constexpr_blockwise_shift_scale_94")]; tensor block_19_ffn_outproj_strides_0 = const()[name = string("block_19_ffn_outproj_strides_0"), val = tensor([1])]; string block_19_ffn_outproj_pad_type_0 = const()[name = string("block_19_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_19_ffn_outproj_pad_0 = const()[name = string("block_19_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_19_ffn_outproj_dilations_0 = const()[name = string("block_19_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_19_ffn_outproj_groups_0 = const()[name = string("block_19_ffn_outproj_groups_0"), val = int32(1)]; tensor block_19_ffn_outproj = conv(dilations = block_19_ffn_outproj_dilations_0, groups = block_19_ffn_outproj_groups_0, pad = block_19_ffn_outproj_pad_0, pad_type = block_19_ffn_outproj_pad_type_0, strides = block_19_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_94, x = block_19_ffn_x_gated)[name = string("block_19_ffn_outproj")]; tensor block_19_residual_2 = add(x = block_19_ffn_outproj, y = block_19_residual_1)[name = string("block_19_residual_2")]; tensor block_20_attention_rmsnorm_abs = abs(x = block_19_residual_2)[name = string("block_20_attention_rmsnorm_abs")]; tensor block_20_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_20_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_20_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_20_attention_rmsnorm_maxval = reduce_max(axes = block_20_attention_rmsnorm_maxval_axes_0, keep_dims = block_20_attention_rmsnorm_maxval_keep_dims_0, x = block_20_attention_rmsnorm_abs)[name = string("block_20_attention_rmsnorm_maxval")]; fp16 block_20_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_20_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_20_attention_rmsnorm_maxval_clipped = clip(alpha = block_20_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_20_attention_rmsnorm_maxval_clipped_beta_0, x = block_20_attention_rmsnorm_maxval)[name = string("block_20_attention_rmsnorm_maxval_clipped")]; tensor block_20_attention_rmsnorm_scaled = real_div(x = block_19_residual_2, y = block_20_attention_rmsnorm_maxval_clipped)[name = string("block_20_attention_rmsnorm_scaled")]; tensor block_20_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_20_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_20_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_20_attention_rmsnorm_squared_sum_keep_dims_0, x = block_20_attention_rmsnorm_scaled)[name = string("block_20_attention_rmsnorm_squared_sum")]; fp16 block_20_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_20_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_20_attention_rmsnorm_rsqrt_epsilon_0, x = block_20_attention_rmsnorm_squared_sum)[name = string("block_20_attention_rmsnorm_rsqrt")]; fp16 block_20_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_20_attention_rmsnorm_dim_scaled = mul(x = block_20_attention_rmsnorm_scaled, y = block_20_attention_rmsnorm_dim_scaled_y_0)[name = string("block_20_attention_rmsnorm_dim_scaled")]; tensor block_20_attention_rmsnorm_normalized = mul(x = block_20_attention_rmsnorm_dim_scaled, y = block_20_attention_rmsnorm_rsqrt)[name = string("block_20_attention_rmsnorm_normalized")]; tensor block_20_attention_rmsnorm_y_0 = const()[name = string("block_20_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523529664)))]; tensor block_20_attention_rmsnorm = mul(x = block_20_attention_rmsnorm_normalized, y = block_20_attention_rmsnorm_y_0)[name = string("block_20_attention_rmsnorm")]; tensor attention_20_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523531520))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524305728))))[name = string("attention_20_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_95 = constexpr_blockwise_shift_scale(data = attention_20_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524342656))))[name = string("constexpr_blockwise_shift_scale_95")]; tensor attention_20_qkvproj_bias_0 = const()[name = string("attention_20_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524345024)))]; tensor attention_20_qkvproj_strides_0 = const()[name = string("attention_20_qkvproj_strides_0"), val = tensor([1])]; string attention_20_qkvproj_pad_type_0 = const()[name = string("attention_20_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_20_qkvproj_pad_0 = const()[name = string("attention_20_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_20_qkvproj_dilations_0 = const()[name = string("attention_20_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_20_qkvproj_groups_0 = const()[name = string("attention_20_qkvproj_groups_0"), val = int32(1)]; tensor attention_20_qkvproj = conv(bias = attention_20_qkvproj_bias_0, dilations = attention_20_qkvproj_dilations_0, groups = attention_20_qkvproj_groups_0, pad = attention_20_qkvproj_pad_0, pad_type = attention_20_qkvproj_pad_type_0, strides = attention_20_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_95, x = block_20_attention_rmsnorm)[name = string("attention_20_qkvproj")]; tensor attention_20_head_reshape_shape_0 = const()[name = string("attention_20_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_20_head_reshape = reshape(shape = attention_20_head_reshape_shape_0, x = attention_20_qkvproj)[name = string("attention_20_head_reshape")]; tensor attention_20_head_transpose_perm_0 = const()[name = string("attention_20_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_20_split_qkv_heads_axis_0 = const()[name = string("attention_20_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_20_split_qkv_heads_split_sizes_0 = const()[name = string("attention_20_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_20_head_transpose = transpose(perm = attention_20_head_transpose_perm_0, x = attention_20_head_reshape)[name = string("transpose_8")]; tensor attention_20_split_qkv_heads_0, tensor attention_20_split_qkv_heads_1, tensor attention_20_split_qkv_heads_2 = split(axis = attention_20_split_qkv_heads_axis_0, split_sizes = attention_20_split_qkv_heads_split_sizes_0, x = attention_20_head_transpose)[name = string("attention_20_split_qkv_heads")]; tensor attention_20_q_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_20_q_rope_lhs_mult")]; int32 attention_20_q_rotate_half_split_num_splits_0 = const()[name = string("attention_20_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_20_q_rotate_half_split_axis_0 = const()[name = string("attention_20_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_20_q_rotate_half_split_0, tensor attention_20_q_rotate_half_split_1 = split(axis = attention_20_q_rotate_half_split_axis_0, num_splits = attention_20_q_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_0)[name = string("attention_20_q_rotate_half_split")]; fp16 attention_20_q_rotate_half_neg_y_0 = const()[name = string("attention_20_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_20_q_rotate_half_neg = mul(x = attention_20_q_rotate_half_split_1, y = attention_20_q_rotate_half_neg_y_0)[name = string("attention_20_q_rotate_half_neg")]; int32 attention_20_q_rotate_half_concat_axis_0 = const()[name = string("attention_20_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_20_q_rotate_half_concat_interleave_0 = const()[name = string("attention_20_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_20_q_rotate_half_concat = concat(axis = attention_20_q_rotate_half_concat_axis_0, interleave = attention_20_q_rotate_half_concat_interleave_0, values = (attention_20_q_rotate_half_neg, attention_20_q_rotate_half_split_0))[name = string("attention_20_q_rotate_half_concat")]; tensor attention_20_q_rope_rhs_mult = mul(x = attention_20_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_q_rope_rhs_mult")]; tensor attention_20_q_rope = add(x = attention_20_q_rope_lhs_mult, y = attention_20_q_rope_rhs_mult)[name = string("attention_20_q_rope")]; tensor attention_20_k_rope_lhs_mult = mul(x = attention_20_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_20_k_rope_lhs_mult")]; int32 attention_20_k_rotate_half_split_num_splits_0 = const()[name = string("attention_20_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_20_k_rotate_half_split_axis_0 = const()[name = string("attention_20_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_20_k_rotate_half_split_0, tensor attention_20_k_rotate_half_split_1 = split(axis = attention_20_k_rotate_half_split_axis_0, num_splits = attention_20_k_rotate_half_split_num_splits_0, x = attention_20_split_qkv_heads_1)[name = string("attention_20_k_rotate_half_split")]; fp16 attention_20_k_rotate_half_neg_y_0 = const()[name = string("attention_20_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_20_k_rotate_half_neg = mul(x = attention_20_k_rotate_half_split_1, y = attention_20_k_rotate_half_neg_y_0)[name = string("attention_20_k_rotate_half_neg")]; int32 attention_20_k_rotate_half_concat_axis_0 = const()[name = string("attention_20_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_20_k_rotate_half_concat_interleave_0 = const()[name = string("attention_20_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_20_k_rotate_half_concat = concat(axis = attention_20_k_rotate_half_concat_axis_0, interleave = attention_20_k_rotate_half_concat_interleave_0, values = (attention_20_k_rotate_half_neg, attention_20_k_rotate_half_split_0))[name = string("attention_20_k_rotate_half_concat")]; tensor attention_20_k_rope_rhs_mult = mul(x = attention_20_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_20_k_rope_rhs_mult")]; tensor attention_20_k_rope = add(x = attention_20_k_rope_lhs_mult, y = attention_20_k_rope_rhs_mult)[name = string("attention_20_k_rope")]; int32 attention_20_q_splits_axis_0 = const()[name = string("attention_20_q_splits_axis_0"), val = int32(1)]; int32 attention_20_q_splits_num_splits_0 = const()[name = string("attention_20_q_splits_num_splits_0"), val = int32(2)]; tensor attention_20_q_splits_0, tensor attention_20_q_splits_1 = split(axis = attention_20_q_splits_axis_0, num_splits = attention_20_q_splits_num_splits_0, x = attention_20_q_rope)[name = string("attention_20_q_splits")]; tensor attention_20_update_begin_0_values0_0 = const()[name = string("attention_20_update_begin_0_values0_0"), val = tensor([20])]; tensor attention_20_update_begin_0_values1_0 = const()[name = string("attention_20_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_20_update_begin_0_values3_0 = const()[name = string("attention_20_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_20_update_begin_0_axis_0 = const()[name = string("attention_20_update_begin_0_axis_0"), val = int32(0)]; bool attention_20_update_begin_0_interleave_0 = const()[name = string("attention_20_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_20_update_begin_0 = concat(axis = attention_20_update_begin_0_axis_0, interleave = attention_20_update_begin_0_interleave_0, values = (attention_20_update_begin_0_values0_0, attention_20_update_begin_0_values1_0, query_pos1, attention_20_update_begin_0_values3_0))[name = string("attention_20_update_begin_0")]; tensor attention_20_update_end_0_values0_0 = const()[name = string("attention_20_update_end_0_values0_0"), val = tensor([21])]; tensor attention_20_update_end_0_values1_0 = const()[name = string("attention_20_update_end_0_values1_0"), val = tensor([2])]; tensor attention_20_update_end_0_values3_0 = const()[name = string("attention_20_update_end_0_values3_0"), val = tensor([64])]; int32 attention_20_update_end_0_axis_0 = const()[name = string("attention_20_update_end_0_axis_0"), val = int32(0)]; bool attention_20_update_end_0_interleave_0 = const()[name = string("attention_20_update_end_0_interleave_0"), val = bool(false)]; tensor attention_20_update_end_0 = concat(axis = attention_20_update_end_0_axis_0, interleave = attention_20_update_end_0_interleave_0, values = (attention_20_update_end_0_values0_0, attention_20_update_end_0_values1_0, end_pos_0, attention_20_update_end_0_values3_0))[name = string("attention_20_update_end_0")]; tensor attention_20_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_updated_key_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_key_cache_0_squeeze_mask_0, update = attention_20_k_rope, x = coreml_update_state_38)[name = string("attention_20_updated_key_cache_0")]; write_state(data = attention_20_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_40 = read_state(input = key_cache_state)[name = string("coreml_update_state_88")]; tensor attention_20_key_cache_begin_0 = const()[name = string("attention_20_key_cache_begin_0"), val = tensor([20, 0, 0, 0])]; tensor attention_20_key_cache_end_0 = const()[name = string("attention_20_key_cache_end_0"), val = tensor([21, 2, 512, 64])]; tensor attention_20_key_cache_squeeze_mask_0 = const()[name = string("attention_20_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_key_cache = slice_by_index(begin = attention_20_key_cache_begin_0, end = attention_20_key_cache_end_0, squeeze_mask = attention_20_key_cache_squeeze_mask_0, x = coreml_update_state_40)[name = string("attention_20_key_cache")]; int32 attention_20_key_cache_head_axis_0 = const()[name = string("attention_20_key_cache_head_axis_0"), val = int32(1)]; int32 attention_20_key_cache_head_num_splits_0 = const()[name = string("attention_20_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_20_key_cache_head_0, tensor attention_20_key_cache_head_1 = split(axis = attention_20_key_cache_head_axis_0, num_splits = attention_20_key_cache_head_num_splits_0, x = attention_20_key_cache)[name = string("attention_20_key_cache_head")]; tensor attention_20_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_20_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_updated_value_cache_0 = slice_update(begin = attention_20_update_begin_0, end = attention_20_update_end_0, squeeze_mask = attention_20_updated_value_cache_0_squeeze_mask_0, update = attention_20_split_qkv_heads_2, x = coreml_update_state_39)[name = string("attention_20_updated_value_cache_0")]; write_state(data = attention_20_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_41 = read_state(input = value_cache_state)[name = string("coreml_update_state_89")]; tensor attention_20_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_20_slice_current_layer_value_cache_begin_0"), val = tensor([20, 0, 0, 0])]; tensor attention_20_slice_current_layer_value_cache_end_0 = const()[name = string("attention_20_slice_current_layer_value_cache_end_0"), val = tensor([21, 2, 512, 64])]; tensor attention_20_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_20_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_20_slice_current_layer_value_cache = slice_by_index(begin = attention_20_slice_current_layer_value_cache_begin_0, end = attention_20_slice_current_layer_value_cache_end_0, squeeze_mask = attention_20_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_41)[name = string("attention_20_slice_current_layer_value_cache")]; int32 attention_20_slice_value_cache_heads_axis_0 = const()[name = string("attention_20_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_20_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_20_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_20_slice_value_cache_heads_0, tensor attention_20_slice_value_cache_heads_1 = split(axis = attention_20_slice_value_cache_heads_axis_0, num_splits = attention_20_slice_value_cache_heads_num_splits_0, x = attention_20_slice_current_layer_value_cache)[name = string("attention_20_slice_value_cache_heads")]; bool attention_20_scores_0_transpose_y_0 = const()[name = string("attention_20_scores_0_transpose_y_0"), val = bool(true)]; bool attention_20_scores_0_transpose_x_0 = const()[name = string("attention_20_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_20_scores_0 = matmul(transpose_x = attention_20_scores_0_transpose_x_0, transpose_y = attention_20_scores_0_transpose_y_0, x = attention_20_key_cache_head_0, y = attention_20_q_splits_0)[name = string("attention_20_scores_0")]; fp16 attention_20_scaled_scores_0_y_0 = const()[name = string("attention_20_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_20_scaled_scores_0 = mul(x = attention_20_scores_0, y = attention_20_scaled_scores_0_y_0)[name = string("attention_20_scaled_scores_0")]; tensor attention_20_masked_scaled_scores_0 = add(x = attention_20_scaled_scores_0, y = transpose_0)[name = string("attention_20_masked_scaled_scores_0")]; int32 softmax_40_axis_0 = const()[name = string("softmax_40_axis_0"), val = int32(-2)]; tensor softmax_40 = softmax(axis = softmax_40_axis_0, x = attention_20_masked_scaled_scores_0)[name = string("softmax_40")]; bool attention_20_attention_0_transpose_x_0 = const()[name = string("attention_20_attention_0_transpose_x_0"), val = bool(true)]; bool attention_20_attention_0_transpose_y_0 = const()[name = string("attention_20_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_20_attention_0 = matmul(transpose_x = attention_20_attention_0_transpose_x_0, transpose_y = attention_20_attention_0_transpose_y_0, x = softmax_40, y = attention_20_slice_value_cache_heads_0)[name = string("attention_20_attention_0")]; bool attention_20_scores_1_transpose_y_0 = const()[name = string("attention_20_scores_1_transpose_y_0"), val = bool(true)]; bool attention_20_scores_1_transpose_x_0 = const()[name = string("attention_20_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_20_scores_1 = matmul(transpose_x = attention_20_scores_1_transpose_x_0, transpose_y = attention_20_scores_1_transpose_y_0, x = attention_20_key_cache_head_1, y = attention_20_q_splits_1)[name = string("attention_20_scores_1")]; fp16 attention_20_scaled_scores_1_y_0 = const()[name = string("attention_20_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_20_scaled_scores_1 = mul(x = attention_20_scores_1, y = attention_20_scaled_scores_1_y_0)[name = string("attention_20_scaled_scores_1")]; tensor attention_20_masked_scaled_scores_1 = add(x = attention_20_scaled_scores_1, y = transpose_0)[name = string("attention_20_masked_scaled_scores_1")]; int32 softmax_41_axis_0 = const()[name = string("softmax_41_axis_0"), val = int32(-2)]; tensor softmax_41 = softmax(axis = softmax_41_axis_0, x = attention_20_masked_scaled_scores_1)[name = string("softmax_41")]; bool attention_20_attention_1_transpose_x_0 = const()[name = string("attention_20_attention_1_transpose_x_0"), val = bool(true)]; bool attention_20_attention_1_transpose_y_0 = const()[name = string("attention_20_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_20_attention_1 = matmul(transpose_x = attention_20_attention_1_transpose_x_0, transpose_y = attention_20_attention_1_transpose_y_0, x = softmax_41, y = attention_20_slice_value_cache_heads_1)[name = string("attention_20_attention_1")]; int32 attention_20_concat_attention_all_heads_axis_0 = const()[name = string("attention_20_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_20_concat_attention_all_heads_interleave_0 = const()[name = string("attention_20_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_20_concat_attention_all_heads = concat(axis = attention_20_concat_attention_all_heads_axis_0, interleave = attention_20_concat_attention_all_heads_interleave_0, values = (attention_20_attention_0, attention_20_attention_1))[name = string("attention_20_concat_attention_all_heads")]; tensor attention_20_channels_first_retransposed_perm_0 = const()[name = string("attention_20_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_20_reshaped_shape_0 = const()[name = string("attention_20_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_20_channels_first_retransposed = transpose(perm = attention_20_channels_first_retransposed_perm_0, x = attention_20_concat_attention_all_heads)[name = string("transpose_7")]; tensor attention_20_reshaped = reshape(shape = attention_20_reshaped_shape_0, x = attention_20_channels_first_retransposed)[name = string("attention_20_reshaped")]; tensor attention_20_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524347392))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524949568))))[name = string("attention_20_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_96 = constexpr_blockwise_shift_scale(data = attention_20_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524978304))))[name = string("constexpr_blockwise_shift_scale_96")]; tensor attention_20_outproj_strides_0 = const()[name = string("attention_20_outproj_strides_0"), val = tensor([1])]; string attention_20_outproj_pad_type_0 = const()[name = string("attention_20_outproj_pad_type_0"), val = string("valid")]; tensor attention_20_outproj_pad_0 = const()[name = string("attention_20_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_20_outproj_dilations_0 = const()[name = string("attention_20_outproj_dilations_0"), val = tensor([1])]; int32 attention_20_outproj_groups_0 = const()[name = string("attention_20_outproj_groups_0"), val = int32(1)]; tensor attention_20_outproj = conv(dilations = attention_20_outproj_dilations_0, groups = attention_20_outproj_groups_0, pad = attention_20_outproj_pad_0, pad_type = attention_20_outproj_pad_type_0, strides = attention_20_outproj_strides_0, weight = constexpr_blockwise_shift_scale_96, x = attention_20_reshaped)[name = string("attention_20_outproj")]; tensor block_20_residual_1 = add(x = block_19_residual_2, y = attention_20_outproj)[name = string("block_20_residual_1")]; tensor block_20_ffn_rmsnorm_abs = abs(x = block_20_residual_1)[name = string("block_20_ffn_rmsnorm_abs")]; tensor block_20_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_20_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_20_ffn_rmsnorm_maxval = reduce_max(axes = block_20_ffn_rmsnorm_maxval_axes_0, keep_dims = block_20_ffn_rmsnorm_maxval_keep_dims_0, x = block_20_ffn_rmsnorm_abs)[name = string("block_20_ffn_rmsnorm_maxval")]; fp16 block_20_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_20_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_20_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_20_ffn_rmsnorm_maxval_clipped = clip(alpha = block_20_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_20_ffn_rmsnorm_maxval_clipped_beta_0, x = block_20_ffn_rmsnorm_maxval)[name = string("block_20_ffn_rmsnorm_maxval_clipped")]; tensor block_20_ffn_rmsnorm_scaled = real_div(x = block_20_residual_1, y = block_20_ffn_rmsnorm_maxval_clipped)[name = string("block_20_ffn_rmsnorm_scaled")]; tensor block_20_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_20_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_20_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_20_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_20_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_20_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_20_ffn_rmsnorm_scaled)[name = string("block_20_ffn_rmsnorm_squared_sum")]; fp16 block_20_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_20_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_20_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_20_ffn_rmsnorm_rsqrt_epsilon_0, x = block_20_ffn_rmsnorm_squared_sum)[name = string("block_20_ffn_rmsnorm_rsqrt")]; fp16 block_20_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_20_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_20_ffn_rmsnorm_dim_scaled = mul(x = block_20_ffn_rmsnorm_scaled, y = block_20_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_20_ffn_rmsnorm_dim_scaled")]; tensor block_20_ffn_rmsnorm_normalized = mul(x = block_20_ffn_rmsnorm_dim_scaled, y = block_20_ffn_rmsnorm_rsqrt)[name = string("block_20_ffn_rmsnorm_normalized")]; tensor block_20_ffn_rmsnorm_y_0 = const()[name = string("block_20_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524980160)))]; tensor block_20_ffn_rmsnorm = mul(x = block_20_ffn_rmsnorm_normalized, y = block_20_ffn_rmsnorm_y_0)[name = string("block_20_ffn_rmsnorm")]; tensor block_20_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524982016))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528250688))))[name = string("block_20_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_97 = constexpr_blockwise_shift_scale(data = block_20_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528406400))))[name = string("constexpr_blockwise_shift_scale_97")]; tensor block_20_ffn_inproj_strides_0 = const()[name = string("block_20_ffn_inproj_strides_0"), val = tensor([1])]; string block_20_ffn_inproj_pad_type_0 = const()[name = string("block_20_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_20_ffn_inproj_pad_0 = const()[name = string("block_20_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_20_ffn_inproj_dilations_0 = const()[name = string("block_20_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_20_ffn_inproj_groups_0 = const()[name = string("block_20_ffn_inproj_groups_0"), val = int32(1)]; tensor block_20_ffn_inproj = conv(dilations = block_20_ffn_inproj_dilations_0, groups = block_20_ffn_inproj_groups_0, pad = block_20_ffn_inproj_pad_0, pad_type = block_20_ffn_inproj_pad_type_0, strides = block_20_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_97, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_inproj")]; tensor block_20_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(528416192))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531684864))))[name = string("block_20_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_98 = constexpr_blockwise_shift_scale(data = block_20_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531840576))))[name = string("constexpr_blockwise_shift_scale_98")]; tensor block_20_ffn_g_strides_0 = const()[name = string("block_20_ffn_g_strides_0"), val = tensor([1])]; string block_20_ffn_g_pad_type_0 = const()[name = string("block_20_ffn_g_pad_type_0"), val = string("valid")]; tensor block_20_ffn_g_pad_0 = const()[name = string("block_20_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_20_ffn_g_dilations_0 = const()[name = string("block_20_ffn_g_dilations_0"), val = tensor([1])]; int32 block_20_ffn_g_groups_0 = const()[name = string("block_20_ffn_g_groups_0"), val = int32(1)]; tensor block_20_ffn_g = conv(dilations = block_20_ffn_g_dilations_0, groups = block_20_ffn_g_groups_0, pad = block_20_ffn_g_pad_0, pad_type = block_20_ffn_g_pad_type_0, strides = block_20_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_98, x = block_20_ffn_rmsnorm)[name = string("block_20_ffn_g")]; tensor block_20_ffn_g_activation = silu(x = block_20_ffn_g)[name = string("block_20_ffn_g_activation")]; tensor block_20_ffn_x_gated = mul(x = block_20_ffn_inproj, y = block_20_ffn_g_activation)[name = string("block_20_ffn_x_gated")]; tensor block_20_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531850368))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535119040))))[name = string("block_20_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_99 = constexpr_blockwise_shift_scale(data = block_20_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535147776))))[name = string("constexpr_blockwise_shift_scale_99")]; tensor block_20_ffn_outproj_strides_0 = const()[name = string("block_20_ffn_outproj_strides_0"), val = tensor([1])]; string block_20_ffn_outproj_pad_type_0 = const()[name = string("block_20_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_20_ffn_outproj_pad_0 = const()[name = string("block_20_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_20_ffn_outproj_dilations_0 = const()[name = string("block_20_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_20_ffn_outproj_groups_0 = const()[name = string("block_20_ffn_outproj_groups_0"), val = int32(1)]; tensor block_20_ffn_outproj = conv(dilations = block_20_ffn_outproj_dilations_0, groups = block_20_ffn_outproj_groups_0, pad = block_20_ffn_outproj_pad_0, pad_type = block_20_ffn_outproj_pad_type_0, strides = block_20_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_99, x = block_20_ffn_x_gated)[name = string("block_20_ffn_outproj")]; tensor block_20_residual_2 = add(x = block_20_ffn_outproj, y = block_20_residual_1)[name = string("block_20_residual_2")]; tensor block_21_attention_rmsnorm_abs = abs(x = block_20_residual_2)[name = string("block_21_attention_rmsnorm_abs")]; tensor block_21_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_21_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_21_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_21_attention_rmsnorm_maxval = reduce_max(axes = block_21_attention_rmsnorm_maxval_axes_0, keep_dims = block_21_attention_rmsnorm_maxval_keep_dims_0, x = block_21_attention_rmsnorm_abs)[name = string("block_21_attention_rmsnorm_maxval")]; fp16 block_21_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_21_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_21_attention_rmsnorm_maxval_clipped = clip(alpha = block_21_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_21_attention_rmsnorm_maxval_clipped_beta_0, x = block_21_attention_rmsnorm_maxval)[name = string("block_21_attention_rmsnorm_maxval_clipped")]; tensor block_21_attention_rmsnorm_scaled = real_div(x = block_20_residual_2, y = block_21_attention_rmsnorm_maxval_clipped)[name = string("block_21_attention_rmsnorm_scaled")]; tensor block_21_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_21_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_21_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_21_attention_rmsnorm_squared_sum_keep_dims_0, x = block_21_attention_rmsnorm_scaled)[name = string("block_21_attention_rmsnorm_squared_sum")]; fp16 block_21_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_21_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_21_attention_rmsnorm_rsqrt_epsilon_0, x = block_21_attention_rmsnorm_squared_sum)[name = string("block_21_attention_rmsnorm_rsqrt")]; fp16 block_21_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_21_attention_rmsnorm_dim_scaled = mul(x = block_21_attention_rmsnorm_scaled, y = block_21_attention_rmsnorm_dim_scaled_y_0)[name = string("block_21_attention_rmsnorm_dim_scaled")]; tensor block_21_attention_rmsnorm_normalized = mul(x = block_21_attention_rmsnorm_dim_scaled, y = block_21_attention_rmsnorm_rsqrt)[name = string("block_21_attention_rmsnorm_normalized")]; tensor block_21_attention_rmsnorm_y_0 = const()[name = string("block_21_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535149632)))]; tensor block_21_attention_rmsnorm = mul(x = block_21_attention_rmsnorm_normalized, y = block_21_attention_rmsnorm_y_0)[name = string("block_21_attention_rmsnorm")]; tensor attention_21_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535151488))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535925696))))[name = string("attention_21_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_100 = constexpr_blockwise_shift_scale(data = attention_21_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535962624))))[name = string("constexpr_blockwise_shift_scale_100")]; tensor attention_21_qkvproj_bias_0 = const()[name = string("attention_21_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535964992)))]; tensor attention_21_qkvproj_strides_0 = const()[name = string("attention_21_qkvproj_strides_0"), val = tensor([1])]; string attention_21_qkvproj_pad_type_0 = const()[name = string("attention_21_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_21_qkvproj_pad_0 = const()[name = string("attention_21_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_21_qkvproj_dilations_0 = const()[name = string("attention_21_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_21_qkvproj_groups_0 = const()[name = string("attention_21_qkvproj_groups_0"), val = int32(1)]; tensor attention_21_qkvproj = conv(bias = attention_21_qkvproj_bias_0, dilations = attention_21_qkvproj_dilations_0, groups = attention_21_qkvproj_groups_0, pad = attention_21_qkvproj_pad_0, pad_type = attention_21_qkvproj_pad_type_0, strides = attention_21_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_100, x = block_21_attention_rmsnorm)[name = string("attention_21_qkvproj")]; tensor attention_21_head_reshape_shape_0 = const()[name = string("attention_21_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_21_head_reshape = reshape(shape = attention_21_head_reshape_shape_0, x = attention_21_qkvproj)[name = string("attention_21_head_reshape")]; tensor attention_21_head_transpose_perm_0 = const()[name = string("attention_21_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_21_split_qkv_heads_axis_0 = const()[name = string("attention_21_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_21_split_qkv_heads_split_sizes_0 = const()[name = string("attention_21_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_21_head_transpose = transpose(perm = attention_21_head_transpose_perm_0, x = attention_21_head_reshape)[name = string("transpose_6")]; tensor attention_21_split_qkv_heads_0, tensor attention_21_split_qkv_heads_1, tensor attention_21_split_qkv_heads_2 = split(axis = attention_21_split_qkv_heads_axis_0, split_sizes = attention_21_split_qkv_heads_split_sizes_0, x = attention_21_head_transpose)[name = string("attention_21_split_qkv_heads")]; tensor attention_21_q_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_21_q_rope_lhs_mult")]; int32 attention_21_q_rotate_half_split_num_splits_0 = const()[name = string("attention_21_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_21_q_rotate_half_split_axis_0 = const()[name = string("attention_21_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_21_q_rotate_half_split_0, tensor attention_21_q_rotate_half_split_1 = split(axis = attention_21_q_rotate_half_split_axis_0, num_splits = attention_21_q_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_0)[name = string("attention_21_q_rotate_half_split")]; fp16 attention_21_q_rotate_half_neg_y_0 = const()[name = string("attention_21_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_21_q_rotate_half_neg = mul(x = attention_21_q_rotate_half_split_1, y = attention_21_q_rotate_half_neg_y_0)[name = string("attention_21_q_rotate_half_neg")]; int32 attention_21_q_rotate_half_concat_axis_0 = const()[name = string("attention_21_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_21_q_rotate_half_concat_interleave_0 = const()[name = string("attention_21_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_21_q_rotate_half_concat = concat(axis = attention_21_q_rotate_half_concat_axis_0, interleave = attention_21_q_rotate_half_concat_interleave_0, values = (attention_21_q_rotate_half_neg, attention_21_q_rotate_half_split_0))[name = string("attention_21_q_rotate_half_concat")]; tensor attention_21_q_rope_rhs_mult = mul(x = attention_21_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_q_rope_rhs_mult")]; tensor attention_21_q_rope = add(x = attention_21_q_rope_lhs_mult, y = attention_21_q_rope_rhs_mult)[name = string("attention_21_q_rope")]; tensor attention_21_k_rope_lhs_mult = mul(x = attention_21_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_21_k_rope_lhs_mult")]; int32 attention_21_k_rotate_half_split_num_splits_0 = const()[name = string("attention_21_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_21_k_rotate_half_split_axis_0 = const()[name = string("attention_21_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_21_k_rotate_half_split_0, tensor attention_21_k_rotate_half_split_1 = split(axis = attention_21_k_rotate_half_split_axis_0, num_splits = attention_21_k_rotate_half_split_num_splits_0, x = attention_21_split_qkv_heads_1)[name = string("attention_21_k_rotate_half_split")]; fp16 attention_21_k_rotate_half_neg_y_0 = const()[name = string("attention_21_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_21_k_rotate_half_neg = mul(x = attention_21_k_rotate_half_split_1, y = attention_21_k_rotate_half_neg_y_0)[name = string("attention_21_k_rotate_half_neg")]; int32 attention_21_k_rotate_half_concat_axis_0 = const()[name = string("attention_21_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_21_k_rotate_half_concat_interleave_0 = const()[name = string("attention_21_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_21_k_rotate_half_concat = concat(axis = attention_21_k_rotate_half_concat_axis_0, interleave = attention_21_k_rotate_half_concat_interleave_0, values = (attention_21_k_rotate_half_neg, attention_21_k_rotate_half_split_0))[name = string("attention_21_k_rotate_half_concat")]; tensor attention_21_k_rope_rhs_mult = mul(x = attention_21_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_21_k_rope_rhs_mult")]; tensor attention_21_k_rope = add(x = attention_21_k_rope_lhs_mult, y = attention_21_k_rope_rhs_mult)[name = string("attention_21_k_rope")]; int32 attention_21_q_splits_axis_0 = const()[name = string("attention_21_q_splits_axis_0"), val = int32(1)]; int32 attention_21_q_splits_num_splits_0 = const()[name = string("attention_21_q_splits_num_splits_0"), val = int32(2)]; tensor attention_21_q_splits_0, tensor attention_21_q_splits_1 = split(axis = attention_21_q_splits_axis_0, num_splits = attention_21_q_splits_num_splits_0, x = attention_21_q_rope)[name = string("attention_21_q_splits")]; tensor attention_21_update_begin_0_values0_0 = const()[name = string("attention_21_update_begin_0_values0_0"), val = tensor([21])]; tensor attention_21_update_begin_0_values1_0 = const()[name = string("attention_21_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_21_update_begin_0_values3_0 = const()[name = string("attention_21_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_21_update_begin_0_axis_0 = const()[name = string("attention_21_update_begin_0_axis_0"), val = int32(0)]; bool attention_21_update_begin_0_interleave_0 = const()[name = string("attention_21_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_21_update_begin_0 = concat(axis = attention_21_update_begin_0_axis_0, interleave = attention_21_update_begin_0_interleave_0, values = (attention_21_update_begin_0_values0_0, attention_21_update_begin_0_values1_0, query_pos1, attention_21_update_begin_0_values3_0))[name = string("attention_21_update_begin_0")]; tensor attention_21_update_end_0_values0_0 = const()[name = string("attention_21_update_end_0_values0_0"), val = tensor([22])]; tensor attention_21_update_end_0_values1_0 = const()[name = string("attention_21_update_end_0_values1_0"), val = tensor([2])]; tensor attention_21_update_end_0_values3_0 = const()[name = string("attention_21_update_end_0_values3_0"), val = tensor([64])]; int32 attention_21_update_end_0_axis_0 = const()[name = string("attention_21_update_end_0_axis_0"), val = int32(0)]; bool attention_21_update_end_0_interleave_0 = const()[name = string("attention_21_update_end_0_interleave_0"), val = bool(false)]; tensor attention_21_update_end_0 = concat(axis = attention_21_update_end_0_axis_0, interleave = attention_21_update_end_0_interleave_0, values = (attention_21_update_end_0_values0_0, attention_21_update_end_0_values1_0, end_pos_0, attention_21_update_end_0_values3_0))[name = string("attention_21_update_end_0")]; tensor attention_21_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_updated_key_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_key_cache_0_squeeze_mask_0, update = attention_21_k_rope, x = coreml_update_state_40)[name = string("attention_21_updated_key_cache_0")]; write_state(data = attention_21_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_42 = read_state(input = key_cache_state)[name = string("coreml_update_state_90")]; tensor attention_21_key_cache_begin_0 = const()[name = string("attention_21_key_cache_begin_0"), val = tensor([21, 0, 0, 0])]; tensor attention_21_key_cache_end_0 = const()[name = string("attention_21_key_cache_end_0"), val = tensor([22, 2, 512, 64])]; tensor attention_21_key_cache_squeeze_mask_0 = const()[name = string("attention_21_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_key_cache = slice_by_index(begin = attention_21_key_cache_begin_0, end = attention_21_key_cache_end_0, squeeze_mask = attention_21_key_cache_squeeze_mask_0, x = coreml_update_state_42)[name = string("attention_21_key_cache")]; int32 attention_21_key_cache_head_axis_0 = const()[name = string("attention_21_key_cache_head_axis_0"), val = int32(1)]; int32 attention_21_key_cache_head_num_splits_0 = const()[name = string("attention_21_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_21_key_cache_head_0, tensor attention_21_key_cache_head_1 = split(axis = attention_21_key_cache_head_axis_0, num_splits = attention_21_key_cache_head_num_splits_0, x = attention_21_key_cache)[name = string("attention_21_key_cache_head")]; tensor attention_21_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_21_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_updated_value_cache_0 = slice_update(begin = attention_21_update_begin_0, end = attention_21_update_end_0, squeeze_mask = attention_21_updated_value_cache_0_squeeze_mask_0, update = attention_21_split_qkv_heads_2, x = coreml_update_state_41)[name = string("attention_21_updated_value_cache_0")]; write_state(data = attention_21_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_43 = read_state(input = value_cache_state)[name = string("coreml_update_state_91")]; tensor attention_21_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_21_slice_current_layer_value_cache_begin_0"), val = tensor([21, 0, 0, 0])]; tensor attention_21_slice_current_layer_value_cache_end_0 = const()[name = string("attention_21_slice_current_layer_value_cache_end_0"), val = tensor([22, 2, 512, 64])]; tensor attention_21_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_21_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_21_slice_current_layer_value_cache = slice_by_index(begin = attention_21_slice_current_layer_value_cache_begin_0, end = attention_21_slice_current_layer_value_cache_end_0, squeeze_mask = attention_21_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_43)[name = string("attention_21_slice_current_layer_value_cache")]; int32 attention_21_slice_value_cache_heads_axis_0 = const()[name = string("attention_21_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_21_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_21_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_21_slice_value_cache_heads_0, tensor attention_21_slice_value_cache_heads_1 = split(axis = attention_21_slice_value_cache_heads_axis_0, num_splits = attention_21_slice_value_cache_heads_num_splits_0, x = attention_21_slice_current_layer_value_cache)[name = string("attention_21_slice_value_cache_heads")]; bool attention_21_scores_0_transpose_y_0 = const()[name = string("attention_21_scores_0_transpose_y_0"), val = bool(true)]; bool attention_21_scores_0_transpose_x_0 = const()[name = string("attention_21_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_21_scores_0 = matmul(transpose_x = attention_21_scores_0_transpose_x_0, transpose_y = attention_21_scores_0_transpose_y_0, x = attention_21_key_cache_head_0, y = attention_21_q_splits_0)[name = string("attention_21_scores_0")]; fp16 attention_21_scaled_scores_0_y_0 = const()[name = string("attention_21_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_21_scaled_scores_0 = mul(x = attention_21_scores_0, y = attention_21_scaled_scores_0_y_0)[name = string("attention_21_scaled_scores_0")]; tensor attention_21_masked_scaled_scores_0 = add(x = attention_21_scaled_scores_0, y = transpose_0)[name = string("attention_21_masked_scaled_scores_0")]; int32 softmax_42_axis_0 = const()[name = string("softmax_42_axis_0"), val = int32(-2)]; tensor softmax_42 = softmax(axis = softmax_42_axis_0, x = attention_21_masked_scaled_scores_0)[name = string("softmax_42")]; bool attention_21_attention_0_transpose_x_0 = const()[name = string("attention_21_attention_0_transpose_x_0"), val = bool(true)]; bool attention_21_attention_0_transpose_y_0 = const()[name = string("attention_21_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_21_attention_0 = matmul(transpose_x = attention_21_attention_0_transpose_x_0, transpose_y = attention_21_attention_0_transpose_y_0, x = softmax_42, y = attention_21_slice_value_cache_heads_0)[name = string("attention_21_attention_0")]; bool attention_21_scores_1_transpose_y_0 = const()[name = string("attention_21_scores_1_transpose_y_0"), val = bool(true)]; bool attention_21_scores_1_transpose_x_0 = const()[name = string("attention_21_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_21_scores_1 = matmul(transpose_x = attention_21_scores_1_transpose_x_0, transpose_y = attention_21_scores_1_transpose_y_0, x = attention_21_key_cache_head_1, y = attention_21_q_splits_1)[name = string("attention_21_scores_1")]; fp16 attention_21_scaled_scores_1_y_0 = const()[name = string("attention_21_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_21_scaled_scores_1 = mul(x = attention_21_scores_1, y = attention_21_scaled_scores_1_y_0)[name = string("attention_21_scaled_scores_1")]; tensor attention_21_masked_scaled_scores_1 = add(x = attention_21_scaled_scores_1, y = transpose_0)[name = string("attention_21_masked_scaled_scores_1")]; int32 softmax_43_axis_0 = const()[name = string("softmax_43_axis_0"), val = int32(-2)]; tensor softmax_43 = softmax(axis = softmax_43_axis_0, x = attention_21_masked_scaled_scores_1)[name = string("softmax_43")]; bool attention_21_attention_1_transpose_x_0 = const()[name = string("attention_21_attention_1_transpose_x_0"), val = bool(true)]; bool attention_21_attention_1_transpose_y_0 = const()[name = string("attention_21_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_21_attention_1 = matmul(transpose_x = attention_21_attention_1_transpose_x_0, transpose_y = attention_21_attention_1_transpose_y_0, x = softmax_43, y = attention_21_slice_value_cache_heads_1)[name = string("attention_21_attention_1")]; int32 attention_21_concat_attention_all_heads_axis_0 = const()[name = string("attention_21_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_21_concat_attention_all_heads_interleave_0 = const()[name = string("attention_21_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_21_concat_attention_all_heads = concat(axis = attention_21_concat_attention_all_heads_axis_0, interleave = attention_21_concat_attention_all_heads_interleave_0, values = (attention_21_attention_0, attention_21_attention_1))[name = string("attention_21_concat_attention_all_heads")]; tensor attention_21_channels_first_retransposed_perm_0 = const()[name = string("attention_21_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_21_reshaped_shape_0 = const()[name = string("attention_21_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_21_channels_first_retransposed = transpose(perm = attention_21_channels_first_retransposed_perm_0, x = attention_21_concat_attention_all_heads)[name = string("transpose_5")]; tensor attention_21_reshaped = reshape(shape = attention_21_reshaped_shape_0, x = attention_21_channels_first_retransposed)[name = string("attention_21_reshaped")]; tensor attention_21_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535967360))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536569536))))[name = string("attention_21_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_101 = constexpr_blockwise_shift_scale(data = attention_21_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536598272))))[name = string("constexpr_blockwise_shift_scale_101")]; tensor attention_21_outproj_strides_0 = const()[name = string("attention_21_outproj_strides_0"), val = tensor([1])]; string attention_21_outproj_pad_type_0 = const()[name = string("attention_21_outproj_pad_type_0"), val = string("valid")]; tensor attention_21_outproj_pad_0 = const()[name = string("attention_21_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_21_outproj_dilations_0 = const()[name = string("attention_21_outproj_dilations_0"), val = tensor([1])]; int32 attention_21_outproj_groups_0 = const()[name = string("attention_21_outproj_groups_0"), val = int32(1)]; tensor attention_21_outproj = conv(dilations = attention_21_outproj_dilations_0, groups = attention_21_outproj_groups_0, pad = attention_21_outproj_pad_0, pad_type = attention_21_outproj_pad_type_0, strides = attention_21_outproj_strides_0, weight = constexpr_blockwise_shift_scale_101, x = attention_21_reshaped)[name = string("attention_21_outproj")]; tensor block_21_residual_1 = add(x = block_20_residual_2, y = attention_21_outproj)[name = string("block_21_residual_1")]; tensor block_21_ffn_rmsnorm_abs = abs(x = block_21_residual_1)[name = string("block_21_ffn_rmsnorm_abs")]; tensor block_21_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_21_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_21_ffn_rmsnorm_maxval = reduce_max(axes = block_21_ffn_rmsnorm_maxval_axes_0, keep_dims = block_21_ffn_rmsnorm_maxval_keep_dims_0, x = block_21_ffn_rmsnorm_abs)[name = string("block_21_ffn_rmsnorm_maxval")]; fp16 block_21_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_21_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_21_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_21_ffn_rmsnorm_maxval_clipped = clip(alpha = block_21_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_21_ffn_rmsnorm_maxval_clipped_beta_0, x = block_21_ffn_rmsnorm_maxval)[name = string("block_21_ffn_rmsnorm_maxval_clipped")]; tensor block_21_ffn_rmsnorm_scaled = real_div(x = block_21_residual_1, y = block_21_ffn_rmsnorm_maxval_clipped)[name = string("block_21_ffn_rmsnorm_scaled")]; tensor block_21_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_21_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_21_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_21_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_21_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_21_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_21_ffn_rmsnorm_scaled)[name = string("block_21_ffn_rmsnorm_squared_sum")]; fp16 block_21_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_21_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_21_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_21_ffn_rmsnorm_rsqrt_epsilon_0, x = block_21_ffn_rmsnorm_squared_sum)[name = string("block_21_ffn_rmsnorm_rsqrt")]; fp16 block_21_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_21_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_21_ffn_rmsnorm_dim_scaled = mul(x = block_21_ffn_rmsnorm_scaled, y = block_21_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_21_ffn_rmsnorm_dim_scaled")]; tensor block_21_ffn_rmsnorm_normalized = mul(x = block_21_ffn_rmsnorm_dim_scaled, y = block_21_ffn_rmsnorm_rsqrt)[name = string("block_21_ffn_rmsnorm_normalized")]; tensor block_21_ffn_rmsnorm_y_0 = const()[name = string("block_21_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536600128)))]; tensor block_21_ffn_rmsnorm = mul(x = block_21_ffn_rmsnorm_normalized, y = block_21_ffn_rmsnorm_y_0)[name = string("block_21_ffn_rmsnorm")]; tensor block_21_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(536601984))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539870656))))[name = string("block_21_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_102 = constexpr_blockwise_shift_scale(data = block_21_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540026368))))[name = string("constexpr_blockwise_shift_scale_102")]; tensor block_21_ffn_inproj_strides_0 = const()[name = string("block_21_ffn_inproj_strides_0"), val = tensor([1])]; string block_21_ffn_inproj_pad_type_0 = const()[name = string("block_21_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_21_ffn_inproj_pad_0 = const()[name = string("block_21_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_21_ffn_inproj_dilations_0 = const()[name = string("block_21_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_21_ffn_inproj_groups_0 = const()[name = string("block_21_ffn_inproj_groups_0"), val = int32(1)]; tensor block_21_ffn_inproj = conv(dilations = block_21_ffn_inproj_dilations_0, groups = block_21_ffn_inproj_groups_0, pad = block_21_ffn_inproj_pad_0, pad_type = block_21_ffn_inproj_pad_type_0, strides = block_21_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_102, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_inproj")]; tensor block_21_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(540036160))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543304832))))[name = string("block_21_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_103 = constexpr_blockwise_shift_scale(data = block_21_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543460544))))[name = string("constexpr_blockwise_shift_scale_103")]; tensor block_21_ffn_g_strides_0 = const()[name = string("block_21_ffn_g_strides_0"), val = tensor([1])]; string block_21_ffn_g_pad_type_0 = const()[name = string("block_21_ffn_g_pad_type_0"), val = string("valid")]; tensor block_21_ffn_g_pad_0 = const()[name = string("block_21_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_21_ffn_g_dilations_0 = const()[name = string("block_21_ffn_g_dilations_0"), val = tensor([1])]; int32 block_21_ffn_g_groups_0 = const()[name = string("block_21_ffn_g_groups_0"), val = int32(1)]; tensor block_21_ffn_g = conv(dilations = block_21_ffn_g_dilations_0, groups = block_21_ffn_g_groups_0, pad = block_21_ffn_g_pad_0, pad_type = block_21_ffn_g_pad_type_0, strides = block_21_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_103, x = block_21_ffn_rmsnorm)[name = string("block_21_ffn_g")]; tensor block_21_ffn_g_activation = silu(x = block_21_ffn_g)[name = string("block_21_ffn_g_activation")]; tensor block_21_ffn_x_gated = mul(x = block_21_ffn_inproj, y = block_21_ffn_g_activation)[name = string("block_21_ffn_x_gated")]; tensor block_21_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(543470336))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546739008))))[name = string("block_21_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_104 = constexpr_blockwise_shift_scale(data = block_21_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546767744))))[name = string("constexpr_blockwise_shift_scale_104")]; tensor block_21_ffn_outproj_strides_0 = const()[name = string("block_21_ffn_outproj_strides_0"), val = tensor([1])]; string block_21_ffn_outproj_pad_type_0 = const()[name = string("block_21_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_21_ffn_outproj_pad_0 = const()[name = string("block_21_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_21_ffn_outproj_dilations_0 = const()[name = string("block_21_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_21_ffn_outproj_groups_0 = const()[name = string("block_21_ffn_outproj_groups_0"), val = int32(1)]; tensor block_21_ffn_outproj = conv(dilations = block_21_ffn_outproj_dilations_0, groups = block_21_ffn_outproj_groups_0, pad = block_21_ffn_outproj_pad_0, pad_type = block_21_ffn_outproj_pad_type_0, strides = block_21_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_104, x = block_21_ffn_x_gated)[name = string("block_21_ffn_outproj")]; tensor block_21_residual_2 = add(x = block_21_ffn_outproj, y = block_21_residual_1)[name = string("block_21_residual_2")]; tensor block_22_attention_rmsnorm_abs = abs(x = block_21_residual_2)[name = string("block_22_attention_rmsnorm_abs")]; tensor block_22_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_22_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_22_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_22_attention_rmsnorm_maxval = reduce_max(axes = block_22_attention_rmsnorm_maxval_axes_0, keep_dims = block_22_attention_rmsnorm_maxval_keep_dims_0, x = block_22_attention_rmsnorm_abs)[name = string("block_22_attention_rmsnorm_maxval")]; fp16 block_22_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_22_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_22_attention_rmsnorm_maxval_clipped = clip(alpha = block_22_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_22_attention_rmsnorm_maxval_clipped_beta_0, x = block_22_attention_rmsnorm_maxval)[name = string("block_22_attention_rmsnorm_maxval_clipped")]; tensor block_22_attention_rmsnorm_scaled = real_div(x = block_21_residual_2, y = block_22_attention_rmsnorm_maxval_clipped)[name = string("block_22_attention_rmsnorm_scaled")]; tensor block_22_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_22_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_22_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_22_attention_rmsnorm_squared_sum_keep_dims_0, x = block_22_attention_rmsnorm_scaled)[name = string("block_22_attention_rmsnorm_squared_sum")]; fp16 block_22_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_22_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_22_attention_rmsnorm_rsqrt_epsilon_0, x = block_22_attention_rmsnorm_squared_sum)[name = string("block_22_attention_rmsnorm_rsqrt")]; fp16 block_22_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_22_attention_rmsnorm_dim_scaled = mul(x = block_22_attention_rmsnorm_scaled, y = block_22_attention_rmsnorm_dim_scaled_y_0)[name = string("block_22_attention_rmsnorm_dim_scaled")]; tensor block_22_attention_rmsnorm_normalized = mul(x = block_22_attention_rmsnorm_dim_scaled, y = block_22_attention_rmsnorm_rsqrt)[name = string("block_22_attention_rmsnorm_normalized")]; tensor block_22_attention_rmsnorm_y_0 = const()[name = string("block_22_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546769600)))]; tensor block_22_attention_rmsnorm = mul(x = block_22_attention_rmsnorm_normalized, y = block_22_attention_rmsnorm_y_0)[name = string("block_22_attention_rmsnorm")]; tensor attention_22_qkvproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(546771456))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547545664))))[name = string("attention_22_qkvproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_105 = constexpr_blockwise_shift_scale(data = attention_22_qkvproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547582592))))[name = string("constexpr_blockwise_shift_scale_105")]; tensor attention_22_qkvproj_bias_0 = const()[name = string("attention_22_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547584960)))]; tensor attention_22_qkvproj_strides_0 = const()[name = string("attention_22_qkvproj_strides_0"), val = tensor([1])]; string attention_22_qkvproj_pad_type_0 = const()[name = string("attention_22_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_22_qkvproj_pad_0 = const()[name = string("attention_22_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_22_qkvproj_dilations_0 = const()[name = string("attention_22_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_22_qkvproj_groups_0 = const()[name = string("attention_22_qkvproj_groups_0"), val = int32(1)]; tensor attention_22_qkvproj = conv(bias = attention_22_qkvproj_bias_0, dilations = attention_22_qkvproj_dilations_0, groups = attention_22_qkvproj_groups_0, pad = attention_22_qkvproj_pad_0, pad_type = attention_22_qkvproj_pad_type_0, strides = attention_22_qkvproj_strides_0, weight = constexpr_blockwise_shift_scale_105, x = block_22_attention_rmsnorm)[name = string("attention_22_qkvproj")]; tensor attention_22_head_reshape_shape_0 = const()[name = string("attention_22_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_22_head_reshape = reshape(shape = attention_22_head_reshape_shape_0, x = attention_22_qkvproj)[name = string("attention_22_head_reshape")]; tensor attention_22_head_transpose_perm_0 = const()[name = string("attention_22_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_22_split_qkv_heads_axis_0 = const()[name = string("attention_22_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_22_split_qkv_heads_split_sizes_0 = const()[name = string("attention_22_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_22_head_transpose = transpose(perm = attention_22_head_transpose_perm_0, x = attention_22_head_reshape)[name = string("transpose_4")]; tensor attention_22_split_qkv_heads_0, tensor attention_22_split_qkv_heads_1, tensor attention_22_split_qkv_heads_2 = split(axis = attention_22_split_qkv_heads_axis_0, split_sizes = attention_22_split_qkv_heads_split_sizes_0, x = attention_22_head_transpose)[name = string("attention_22_split_qkv_heads")]; tensor attention_22_q_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_22_q_rope_lhs_mult")]; int32 attention_22_q_rotate_half_split_num_splits_0 = const()[name = string("attention_22_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_22_q_rotate_half_split_axis_0 = const()[name = string("attention_22_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_22_q_rotate_half_split_0, tensor attention_22_q_rotate_half_split_1 = split(axis = attention_22_q_rotate_half_split_axis_0, num_splits = attention_22_q_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_0)[name = string("attention_22_q_rotate_half_split")]; fp16 attention_22_q_rotate_half_neg_y_0 = const()[name = string("attention_22_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_22_q_rotate_half_neg = mul(x = attention_22_q_rotate_half_split_1, y = attention_22_q_rotate_half_neg_y_0)[name = string("attention_22_q_rotate_half_neg")]; int32 attention_22_q_rotate_half_concat_axis_0 = const()[name = string("attention_22_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_22_q_rotate_half_concat_interleave_0 = const()[name = string("attention_22_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_22_q_rotate_half_concat = concat(axis = attention_22_q_rotate_half_concat_axis_0, interleave = attention_22_q_rotate_half_concat_interleave_0, values = (attention_22_q_rotate_half_neg, attention_22_q_rotate_half_split_0))[name = string("attention_22_q_rotate_half_concat")]; tensor attention_22_q_rope_rhs_mult = mul(x = attention_22_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_q_rope_rhs_mult")]; tensor attention_22_q_rope = add(x = attention_22_q_rope_lhs_mult, y = attention_22_q_rope_rhs_mult)[name = string("attention_22_q_rope")]; tensor attention_22_k_rope_lhs_mult = mul(x = attention_22_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_22_k_rope_lhs_mult")]; int32 attention_22_k_rotate_half_split_num_splits_0 = const()[name = string("attention_22_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_22_k_rotate_half_split_axis_0 = const()[name = string("attention_22_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_22_k_rotate_half_split_0, tensor attention_22_k_rotate_half_split_1 = split(axis = attention_22_k_rotate_half_split_axis_0, num_splits = attention_22_k_rotate_half_split_num_splits_0, x = attention_22_split_qkv_heads_1)[name = string("attention_22_k_rotate_half_split")]; fp16 attention_22_k_rotate_half_neg_y_0 = const()[name = string("attention_22_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_22_k_rotate_half_neg = mul(x = attention_22_k_rotate_half_split_1, y = attention_22_k_rotate_half_neg_y_0)[name = string("attention_22_k_rotate_half_neg")]; int32 attention_22_k_rotate_half_concat_axis_0 = const()[name = string("attention_22_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_22_k_rotate_half_concat_interleave_0 = const()[name = string("attention_22_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_22_k_rotate_half_concat = concat(axis = attention_22_k_rotate_half_concat_axis_0, interleave = attention_22_k_rotate_half_concat_interleave_0, values = (attention_22_k_rotate_half_neg, attention_22_k_rotate_half_split_0))[name = string("attention_22_k_rotate_half_concat")]; tensor attention_22_k_rope_rhs_mult = mul(x = attention_22_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_22_k_rope_rhs_mult")]; tensor attention_22_k_rope = add(x = attention_22_k_rope_lhs_mult, y = attention_22_k_rope_rhs_mult)[name = string("attention_22_k_rope")]; int32 attention_22_q_splits_axis_0 = const()[name = string("attention_22_q_splits_axis_0"), val = int32(1)]; int32 attention_22_q_splits_num_splits_0 = const()[name = string("attention_22_q_splits_num_splits_0"), val = int32(2)]; tensor attention_22_q_splits_0, tensor attention_22_q_splits_1 = split(axis = attention_22_q_splits_axis_0, num_splits = attention_22_q_splits_num_splits_0, x = attention_22_q_rope)[name = string("attention_22_q_splits")]; tensor attention_22_update_begin_0_values0_0 = const()[name = string("attention_22_update_begin_0_values0_0"), val = tensor([22])]; tensor attention_22_update_begin_0_values1_0 = const()[name = string("attention_22_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_22_update_begin_0_values3_0 = const()[name = string("attention_22_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_22_update_begin_0_axis_0 = const()[name = string("attention_22_update_begin_0_axis_0"), val = int32(0)]; bool attention_22_update_begin_0_interleave_0 = const()[name = string("attention_22_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_22_update_begin_0 = concat(axis = attention_22_update_begin_0_axis_0, interleave = attention_22_update_begin_0_interleave_0, values = (attention_22_update_begin_0_values0_0, attention_22_update_begin_0_values1_0, query_pos1, attention_22_update_begin_0_values3_0))[name = string("attention_22_update_begin_0")]; tensor attention_22_update_end_0_values0_0 = const()[name = string("attention_22_update_end_0_values0_0"), val = tensor([23])]; tensor attention_22_update_end_0_values1_0 = const()[name = string("attention_22_update_end_0_values1_0"), val = tensor([2])]; tensor attention_22_update_end_0_values3_0 = const()[name = string("attention_22_update_end_0_values3_0"), val = tensor([64])]; int32 attention_22_update_end_0_axis_0 = const()[name = string("attention_22_update_end_0_axis_0"), val = int32(0)]; bool attention_22_update_end_0_interleave_0 = const()[name = string("attention_22_update_end_0_interleave_0"), val = bool(false)]; tensor attention_22_update_end_0 = concat(axis = attention_22_update_end_0_axis_0, interleave = attention_22_update_end_0_interleave_0, values = (attention_22_update_end_0_values0_0, attention_22_update_end_0_values1_0, end_pos_0, attention_22_update_end_0_values3_0))[name = string("attention_22_update_end_0")]; tensor attention_22_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_updated_key_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_key_cache_0_squeeze_mask_0, update = attention_22_k_rope, x = coreml_update_state_42)[name = string("attention_22_updated_key_cache_0")]; write_state(data = attention_22_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_44 = read_state(input = key_cache_state)[name = string("coreml_update_state_92")]; tensor attention_22_key_cache_begin_0 = const()[name = string("attention_22_key_cache_begin_0"), val = tensor([22, 0, 0, 0])]; tensor attention_22_key_cache_end_0 = const()[name = string("attention_22_key_cache_end_0"), val = tensor([23, 2, 512, 64])]; tensor attention_22_key_cache_squeeze_mask_0 = const()[name = string("attention_22_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_key_cache = slice_by_index(begin = attention_22_key_cache_begin_0, end = attention_22_key_cache_end_0, squeeze_mask = attention_22_key_cache_squeeze_mask_0, x = coreml_update_state_44)[name = string("attention_22_key_cache")]; int32 attention_22_key_cache_head_axis_0 = const()[name = string("attention_22_key_cache_head_axis_0"), val = int32(1)]; int32 attention_22_key_cache_head_num_splits_0 = const()[name = string("attention_22_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_22_key_cache_head_0, tensor attention_22_key_cache_head_1 = split(axis = attention_22_key_cache_head_axis_0, num_splits = attention_22_key_cache_head_num_splits_0, x = attention_22_key_cache)[name = string("attention_22_key_cache_head")]; tensor attention_22_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_22_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_updated_value_cache_0 = slice_update(begin = attention_22_update_begin_0, end = attention_22_update_end_0, squeeze_mask = attention_22_updated_value_cache_0_squeeze_mask_0, update = attention_22_split_qkv_heads_2, x = coreml_update_state_43)[name = string("attention_22_updated_value_cache_0")]; write_state(data = attention_22_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_45 = read_state(input = value_cache_state)[name = string("coreml_update_state_93")]; tensor attention_22_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_22_slice_current_layer_value_cache_begin_0"), val = tensor([22, 0, 0, 0])]; tensor attention_22_slice_current_layer_value_cache_end_0 = const()[name = string("attention_22_slice_current_layer_value_cache_end_0"), val = tensor([23, 2, 512, 64])]; tensor attention_22_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_22_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_22_slice_current_layer_value_cache = slice_by_index(begin = attention_22_slice_current_layer_value_cache_begin_0, end = attention_22_slice_current_layer_value_cache_end_0, squeeze_mask = attention_22_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_45)[name = string("attention_22_slice_current_layer_value_cache")]; int32 attention_22_slice_value_cache_heads_axis_0 = const()[name = string("attention_22_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_22_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_22_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_22_slice_value_cache_heads_0, tensor attention_22_slice_value_cache_heads_1 = split(axis = attention_22_slice_value_cache_heads_axis_0, num_splits = attention_22_slice_value_cache_heads_num_splits_0, x = attention_22_slice_current_layer_value_cache)[name = string("attention_22_slice_value_cache_heads")]; bool attention_22_scores_0_transpose_y_0 = const()[name = string("attention_22_scores_0_transpose_y_0"), val = bool(true)]; bool attention_22_scores_0_transpose_x_0 = const()[name = string("attention_22_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_22_scores_0 = matmul(transpose_x = attention_22_scores_0_transpose_x_0, transpose_y = attention_22_scores_0_transpose_y_0, x = attention_22_key_cache_head_0, y = attention_22_q_splits_0)[name = string("attention_22_scores_0")]; fp16 attention_22_scaled_scores_0_y_0 = const()[name = string("attention_22_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_22_scaled_scores_0 = mul(x = attention_22_scores_0, y = attention_22_scaled_scores_0_y_0)[name = string("attention_22_scaled_scores_0")]; tensor attention_22_masked_scaled_scores_0 = add(x = attention_22_scaled_scores_0, y = transpose_0)[name = string("attention_22_masked_scaled_scores_0")]; int32 softmax_44_axis_0 = const()[name = string("softmax_44_axis_0"), val = int32(-2)]; tensor softmax_44 = softmax(axis = softmax_44_axis_0, x = attention_22_masked_scaled_scores_0)[name = string("softmax_44")]; bool attention_22_attention_0_transpose_x_0 = const()[name = string("attention_22_attention_0_transpose_x_0"), val = bool(true)]; bool attention_22_attention_0_transpose_y_0 = const()[name = string("attention_22_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_22_attention_0 = matmul(transpose_x = attention_22_attention_0_transpose_x_0, transpose_y = attention_22_attention_0_transpose_y_0, x = softmax_44, y = attention_22_slice_value_cache_heads_0)[name = string("attention_22_attention_0")]; bool attention_22_scores_1_transpose_y_0 = const()[name = string("attention_22_scores_1_transpose_y_0"), val = bool(true)]; bool attention_22_scores_1_transpose_x_0 = const()[name = string("attention_22_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_22_scores_1 = matmul(transpose_x = attention_22_scores_1_transpose_x_0, transpose_y = attention_22_scores_1_transpose_y_0, x = attention_22_key_cache_head_1, y = attention_22_q_splits_1)[name = string("attention_22_scores_1")]; fp16 attention_22_scaled_scores_1_y_0 = const()[name = string("attention_22_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_22_scaled_scores_1 = mul(x = attention_22_scores_1, y = attention_22_scaled_scores_1_y_0)[name = string("attention_22_scaled_scores_1")]; tensor attention_22_masked_scaled_scores_1 = add(x = attention_22_scaled_scores_1, y = transpose_0)[name = string("attention_22_masked_scaled_scores_1")]; int32 softmax_45_axis_0 = const()[name = string("softmax_45_axis_0"), val = int32(-2)]; tensor softmax_45 = softmax(axis = softmax_45_axis_0, x = attention_22_masked_scaled_scores_1)[name = string("softmax_45")]; bool attention_22_attention_1_transpose_x_0 = const()[name = string("attention_22_attention_1_transpose_x_0"), val = bool(true)]; bool attention_22_attention_1_transpose_y_0 = const()[name = string("attention_22_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_22_attention_1 = matmul(transpose_x = attention_22_attention_1_transpose_x_0, transpose_y = attention_22_attention_1_transpose_y_0, x = softmax_45, y = attention_22_slice_value_cache_heads_1)[name = string("attention_22_attention_1")]; int32 attention_22_concat_attention_all_heads_axis_0 = const()[name = string("attention_22_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_22_concat_attention_all_heads_interleave_0 = const()[name = string("attention_22_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_22_concat_attention_all_heads = concat(axis = attention_22_concat_attention_all_heads_axis_0, interleave = attention_22_concat_attention_all_heads_interleave_0, values = (attention_22_attention_0, attention_22_attention_1))[name = string("attention_22_concat_attention_all_heads")]; tensor attention_22_channels_first_retransposed_perm_0 = const()[name = string("attention_22_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_22_reshaped_shape_0 = const()[name = string("attention_22_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_22_channels_first_retransposed = transpose(perm = attention_22_channels_first_retransposed_perm_0, x = attention_22_concat_attention_all_heads)[name = string("transpose_3")]; tensor attention_22_reshaped = reshape(shape = attention_22_reshaped_shape_0, x = attention_22_channels_first_retransposed)[name = string("attention_22_reshaped")]; tensor attention_22_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547587328))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548189504))))[name = string("attention_22_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_106 = constexpr_blockwise_shift_scale(data = attention_22_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548218240))))[name = string("constexpr_blockwise_shift_scale_106")]; tensor attention_22_outproj_strides_0 = const()[name = string("attention_22_outproj_strides_0"), val = tensor([1])]; string attention_22_outproj_pad_type_0 = const()[name = string("attention_22_outproj_pad_type_0"), val = string("valid")]; tensor attention_22_outproj_pad_0 = const()[name = string("attention_22_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_22_outproj_dilations_0 = const()[name = string("attention_22_outproj_dilations_0"), val = tensor([1])]; int32 attention_22_outproj_groups_0 = const()[name = string("attention_22_outproj_groups_0"), val = int32(1)]; tensor attention_22_outproj = conv(dilations = attention_22_outproj_dilations_0, groups = attention_22_outproj_groups_0, pad = attention_22_outproj_pad_0, pad_type = attention_22_outproj_pad_type_0, strides = attention_22_outproj_strides_0, weight = constexpr_blockwise_shift_scale_106, x = attention_22_reshaped)[name = string("attention_22_outproj")]; tensor block_22_residual_1 = add(x = block_21_residual_2, y = attention_22_outproj)[name = string("block_22_residual_1")]; tensor block_22_ffn_rmsnorm_abs = abs(x = block_22_residual_1)[name = string("block_22_ffn_rmsnorm_abs")]; tensor block_22_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_22_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_22_ffn_rmsnorm_maxval = reduce_max(axes = block_22_ffn_rmsnorm_maxval_axes_0, keep_dims = block_22_ffn_rmsnorm_maxval_keep_dims_0, x = block_22_ffn_rmsnorm_abs)[name = string("block_22_ffn_rmsnorm_maxval")]; fp16 block_22_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_22_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_22_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_22_ffn_rmsnorm_maxval_clipped = clip(alpha = block_22_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_22_ffn_rmsnorm_maxval_clipped_beta_0, x = block_22_ffn_rmsnorm_maxval)[name = string("block_22_ffn_rmsnorm_maxval_clipped")]; tensor block_22_ffn_rmsnorm_scaled = real_div(x = block_22_residual_1, y = block_22_ffn_rmsnorm_maxval_clipped)[name = string("block_22_ffn_rmsnorm_scaled")]; tensor block_22_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_22_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_22_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_22_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_22_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_22_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_22_ffn_rmsnorm_scaled)[name = string("block_22_ffn_rmsnorm_squared_sum")]; fp16 block_22_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_22_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_22_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_22_ffn_rmsnorm_rsqrt_epsilon_0, x = block_22_ffn_rmsnorm_squared_sum)[name = string("block_22_ffn_rmsnorm_rsqrt")]; fp16 block_22_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_22_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_22_ffn_rmsnorm_dim_scaled = mul(x = block_22_ffn_rmsnorm_scaled, y = block_22_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_22_ffn_rmsnorm_dim_scaled")]; tensor block_22_ffn_rmsnorm_normalized = mul(x = block_22_ffn_rmsnorm_dim_scaled, y = block_22_ffn_rmsnorm_rsqrt)[name = string("block_22_ffn_rmsnorm_normalized")]; tensor block_22_ffn_rmsnorm_y_0 = const()[name = string("block_22_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548220096)))]; tensor block_22_ffn_rmsnorm = mul(x = block_22_ffn_rmsnorm_normalized, y = block_22_ffn_rmsnorm_y_0)[name = string("block_22_ffn_rmsnorm")]; tensor block_22_ffn_inproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548221952))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551490624))))[name = string("block_22_ffn_inproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_107 = constexpr_blockwise_shift_scale(data = block_22_ffn_inproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551646336))))[name = string("constexpr_blockwise_shift_scale_107")]; tensor block_22_ffn_inproj_strides_0 = const()[name = string("block_22_ffn_inproj_strides_0"), val = tensor([1])]; string block_22_ffn_inproj_pad_type_0 = const()[name = string("block_22_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_22_ffn_inproj_pad_0 = const()[name = string("block_22_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_22_ffn_inproj_dilations_0 = const()[name = string("block_22_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_22_ffn_inproj_groups_0 = const()[name = string("block_22_ffn_inproj_groups_0"), val = int32(1)]; tensor block_22_ffn_inproj = conv(dilations = block_22_ffn_inproj_dilations_0, groups = block_22_ffn_inproj_groups_0, pad = block_22_ffn_inproj_pad_0, pad_type = block_22_ffn_inproj_pad_type_0, strides = block_22_ffn_inproj_strides_0, weight = constexpr_blockwise_shift_scale_107, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_inproj")]; tensor block_22_ffn_g_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(551656128))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(554924800))))[name = string("block_22_ffn_g_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_108 = constexpr_blockwise_shift_scale(data = block_22_ffn_g_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555080512))))[name = string("constexpr_blockwise_shift_scale_108")]; tensor block_22_ffn_g_strides_0 = const()[name = string("block_22_ffn_g_strides_0"), val = tensor([1])]; string block_22_ffn_g_pad_type_0 = const()[name = string("block_22_ffn_g_pad_type_0"), val = string("valid")]; tensor block_22_ffn_g_pad_0 = const()[name = string("block_22_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_22_ffn_g_dilations_0 = const()[name = string("block_22_ffn_g_dilations_0"), val = tensor([1])]; int32 block_22_ffn_g_groups_0 = const()[name = string("block_22_ffn_g_groups_0"), val = int32(1)]; tensor block_22_ffn_g = conv(dilations = block_22_ffn_g_dilations_0, groups = block_22_ffn_g_groups_0, pad = block_22_ffn_g_pad_0, pad_type = block_22_ffn_g_pad_type_0, strides = block_22_ffn_g_strides_0, weight = constexpr_blockwise_shift_scale_108, x = block_22_ffn_rmsnorm)[name = string("block_22_ffn_g")]; tensor block_22_ffn_g_activation = silu(x = block_22_ffn_g)[name = string("block_22_ffn_g_activation")]; tensor block_22_ffn_x_gated = mul(x = block_22_ffn_inproj, y = block_22_ffn_g_activation)[name = string("block_22_ffn_x_gated")]; tensor block_22_ffn_outproj_weight_dequantization = constexpr_lut_to_dense(indices = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(555090304))), lut = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558358976))))[name = string("block_22_ffn_outproj_weight_dequantization")]; tensor constexpr_blockwise_shift_scale_109 = constexpr_blockwise_shift_scale(data = block_22_ffn_outproj_weight_dequantization, scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558387712))))[name = string("constexpr_blockwise_shift_scale_109")]; tensor block_22_ffn_outproj_strides_0 = const()[name = string("block_22_ffn_outproj_strides_0"), val = tensor([1])]; string block_22_ffn_outproj_pad_type_0 = const()[name = string("block_22_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_22_ffn_outproj_pad_0 = const()[name = string("block_22_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_22_ffn_outproj_dilations_0 = const()[name = string("block_22_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_22_ffn_outproj_groups_0 = const()[name = string("block_22_ffn_outproj_groups_0"), val = int32(1)]; tensor block_22_ffn_outproj = conv(dilations = block_22_ffn_outproj_dilations_0, groups = block_22_ffn_outproj_groups_0, pad = block_22_ffn_outproj_pad_0, pad_type = block_22_ffn_outproj_pad_type_0, strides = block_22_ffn_outproj_strides_0, weight = constexpr_blockwise_shift_scale_109, x = block_22_ffn_x_gated)[name = string("block_22_ffn_outproj")]; tensor block_22_residual_2 = add(x = block_22_ffn_outproj, y = block_22_residual_1)[name = string("block_22_residual_2")]; tensor block_23_attention_rmsnorm_abs = abs(x = block_22_residual_2)[name = string("block_23_attention_rmsnorm_abs")]; tensor block_23_attention_rmsnorm_maxval_axes_0 = const()[name = string("block_23_attention_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_23_attention_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_23_attention_rmsnorm_maxval = reduce_max(axes = block_23_attention_rmsnorm_maxval_axes_0, keep_dims = block_23_attention_rmsnorm_maxval_keep_dims_0, x = block_23_attention_rmsnorm_abs)[name = string("block_23_attention_rmsnorm_maxval")]; fp16 block_23_attention_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_23_attention_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_attention_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_23_attention_rmsnorm_maxval_clipped = clip(alpha = block_23_attention_rmsnorm_maxval_clipped_alpha_0, beta = block_23_attention_rmsnorm_maxval_clipped_beta_0, x = block_23_attention_rmsnorm_maxval)[name = string("block_23_attention_rmsnorm_maxval_clipped")]; tensor block_23_attention_rmsnorm_scaled = real_div(x = block_22_residual_2, y = block_23_attention_rmsnorm_maxval_clipped)[name = string("block_23_attention_rmsnorm_scaled")]; tensor block_23_attention_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_23_attention_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_attention_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_23_attention_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_attention_rmsnorm_squared_sum_axes_0, keep_dims = block_23_attention_rmsnorm_squared_sum_keep_dims_0, x = block_23_attention_rmsnorm_scaled)[name = string("block_23_attention_rmsnorm_squared_sum")]; fp16 block_23_attention_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_attention_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_23_attention_rmsnorm_rsqrt = rsqrt(epsilon = block_23_attention_rmsnorm_rsqrt_epsilon_0, x = block_23_attention_rmsnorm_squared_sum)[name = string("block_23_attention_rmsnorm_rsqrt")]; fp16 block_23_attention_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_attention_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_23_attention_rmsnorm_dim_scaled = mul(x = block_23_attention_rmsnorm_scaled, y = block_23_attention_rmsnorm_dim_scaled_y_0)[name = string("block_23_attention_rmsnorm_dim_scaled")]; tensor block_23_attention_rmsnorm_normalized = mul(x = block_23_attention_rmsnorm_dim_scaled, y = block_23_attention_rmsnorm_rsqrt)[name = string("block_23_attention_rmsnorm_normalized")]; tensor block_23_attention_rmsnorm_y_0 = const()[name = string("block_23_attention_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558389568)))]; tensor block_23_attention_rmsnorm = mul(x = block_23_attention_rmsnorm_normalized, y = block_23_attention_rmsnorm_y_0)[name = string("block_23_attention_rmsnorm")]; tensor attention_23_qkvproj_weight_0 = const()[name = string("attention_23_qkvproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558391424)))]; tensor attention_23_qkvproj_bias_0 = const()[name = string("attention_23_qkvproj_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560455872)))]; tensor attention_23_qkvproj_strides_0 = const()[name = string("attention_23_qkvproj_strides_0"), val = tensor([1])]; string attention_23_qkvproj_pad_type_0 = const()[name = string("attention_23_qkvproj_pad_type_0"), val = string("valid")]; tensor attention_23_qkvproj_pad_0 = const()[name = string("attention_23_qkvproj_pad_0"), val = tensor([0, 0])]; tensor attention_23_qkvproj_dilations_0 = const()[name = string("attention_23_qkvproj_dilations_0"), val = tensor([1])]; int32 attention_23_qkvproj_groups_0 = const()[name = string("attention_23_qkvproj_groups_0"), val = int32(1)]; tensor attention_23_qkvproj = conv(bias = attention_23_qkvproj_bias_0, dilations = attention_23_qkvproj_dilations_0, groups = attention_23_qkvproj_groups_0, pad = attention_23_qkvproj_pad_0, pad_type = attention_23_qkvproj_pad_type_0, strides = attention_23_qkvproj_strides_0, weight = attention_23_qkvproj_weight_0, x = block_23_attention_rmsnorm)[name = string("attention_23_qkvproj")]; tensor attention_23_head_reshape_shape_0 = const()[name = string("attention_23_head_reshape_shape_0"), val = tensor([1, 18, 64, 64])]; tensor attention_23_head_reshape = reshape(shape = attention_23_head_reshape_shape_0, x = attention_23_qkvproj)[name = string("attention_23_head_reshape")]; tensor attention_23_head_transpose_perm_0 = const()[name = string("attention_23_head_transpose_perm_0"), val = tensor([0, 1, 3, 2])]; int32 attention_23_split_qkv_heads_axis_0 = const()[name = string("attention_23_split_qkv_heads_axis_0"), val = int32(1)]; tensor attention_23_split_qkv_heads_split_sizes_0 = const()[name = string("attention_23_split_qkv_heads_split_sizes_0"), val = tensor([14, 2, 2])]; tensor attention_23_head_transpose = transpose(perm = attention_23_head_transpose_perm_0, x = attention_23_head_reshape)[name = string("transpose_2")]; tensor attention_23_split_qkv_heads_0, tensor attention_23_split_qkv_heads_1, tensor attention_23_split_qkv_heads_2 = split(axis = attention_23_split_qkv_heads_axis_0, split_sizes = attention_23_split_qkv_heads_split_sizes_0, x = attention_23_head_transpose)[name = string("attention_23_split_qkv_heads")]; tensor attention_23_q_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_0, y = query_cos_emb)[name = string("attention_23_q_rope_lhs_mult")]; int32 attention_23_q_rotate_half_split_num_splits_0 = const()[name = string("attention_23_q_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_23_q_rotate_half_split_axis_0 = const()[name = string("attention_23_q_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_23_q_rotate_half_split_0, tensor attention_23_q_rotate_half_split_1 = split(axis = attention_23_q_rotate_half_split_axis_0, num_splits = attention_23_q_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_0)[name = string("attention_23_q_rotate_half_split")]; fp16 attention_23_q_rotate_half_neg_y_0 = const()[name = string("attention_23_q_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_23_q_rotate_half_neg = mul(x = attention_23_q_rotate_half_split_1, y = attention_23_q_rotate_half_neg_y_0)[name = string("attention_23_q_rotate_half_neg")]; int32 attention_23_q_rotate_half_concat_axis_0 = const()[name = string("attention_23_q_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_23_q_rotate_half_concat_interleave_0 = const()[name = string("attention_23_q_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_23_q_rotate_half_concat = concat(axis = attention_23_q_rotate_half_concat_axis_0, interleave = attention_23_q_rotate_half_concat_interleave_0, values = (attention_23_q_rotate_half_neg, attention_23_q_rotate_half_split_0))[name = string("attention_23_q_rotate_half_concat")]; tensor attention_23_q_rope_rhs_mult = mul(x = attention_23_q_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_q_rope_rhs_mult")]; tensor attention_23_q_rope = add(x = attention_23_q_rope_lhs_mult, y = attention_23_q_rope_rhs_mult)[name = string("attention_23_q_rope")]; tensor attention_23_k_rope_lhs_mult = mul(x = attention_23_split_qkv_heads_1, y = query_cos_emb)[name = string("attention_23_k_rope_lhs_mult")]; int32 attention_23_k_rotate_half_split_num_splits_0 = const()[name = string("attention_23_k_rotate_half_split_num_splits_0"), val = int32(2)]; int32 attention_23_k_rotate_half_split_axis_0 = const()[name = string("attention_23_k_rotate_half_split_axis_0"), val = int32(3)]; tensor attention_23_k_rotate_half_split_0, tensor attention_23_k_rotate_half_split_1 = split(axis = attention_23_k_rotate_half_split_axis_0, num_splits = attention_23_k_rotate_half_split_num_splits_0, x = attention_23_split_qkv_heads_1)[name = string("attention_23_k_rotate_half_split")]; fp16 attention_23_k_rotate_half_neg_y_0 = const()[name = string("attention_23_k_rotate_half_neg_y_0"), val = fp16(-0x1p+0)]; tensor attention_23_k_rotate_half_neg = mul(x = attention_23_k_rotate_half_split_1, y = attention_23_k_rotate_half_neg_y_0)[name = string("attention_23_k_rotate_half_neg")]; int32 attention_23_k_rotate_half_concat_axis_0 = const()[name = string("attention_23_k_rotate_half_concat_axis_0"), val = int32(3)]; bool attention_23_k_rotate_half_concat_interleave_0 = const()[name = string("attention_23_k_rotate_half_concat_interleave_0"), val = bool(false)]; tensor attention_23_k_rotate_half_concat = concat(axis = attention_23_k_rotate_half_concat_axis_0, interleave = attention_23_k_rotate_half_concat_interleave_0, values = (attention_23_k_rotate_half_neg, attention_23_k_rotate_half_split_0))[name = string("attention_23_k_rotate_half_concat")]; tensor attention_23_k_rope_rhs_mult = mul(x = attention_23_k_rotate_half_concat, y = query_sin_emb)[name = string("attention_23_k_rope_rhs_mult")]; tensor attention_23_k_rope = add(x = attention_23_k_rope_lhs_mult, y = attention_23_k_rope_rhs_mult)[name = string("attention_23_k_rope")]; int32 attention_23_q_splits_axis_0 = const()[name = string("attention_23_q_splits_axis_0"), val = int32(1)]; int32 attention_23_q_splits_num_splits_0 = const()[name = string("attention_23_q_splits_num_splits_0"), val = int32(2)]; tensor attention_23_q_splits_0, tensor attention_23_q_splits_1 = split(axis = attention_23_q_splits_axis_0, num_splits = attention_23_q_splits_num_splits_0, x = attention_23_q_rope)[name = string("attention_23_q_splits")]; tensor attention_23_update_begin_0_values0_0 = const()[name = string("attention_23_update_begin_0_values0_0"), val = tensor([23])]; tensor attention_23_update_begin_0_values1_0 = const()[name = string("attention_23_update_begin_0_values1_0"), val = tensor([0])]; tensor attention_23_update_begin_0_values3_0 = const()[name = string("attention_23_update_begin_0_values3_0"), val = tensor([0])]; int32 attention_23_update_begin_0_axis_0 = const()[name = string("attention_23_update_begin_0_axis_0"), val = int32(0)]; bool attention_23_update_begin_0_interleave_0 = const()[name = string("attention_23_update_begin_0_interleave_0"), val = bool(false)]; tensor attention_23_update_begin_0 = concat(axis = attention_23_update_begin_0_axis_0, interleave = attention_23_update_begin_0_interleave_0, values = (attention_23_update_begin_0_values0_0, attention_23_update_begin_0_values1_0, query_pos1, attention_23_update_begin_0_values3_0))[name = string("attention_23_update_begin_0")]; tensor attention_23_update_end_0_values0_0 = const()[name = string("attention_23_update_end_0_values0_0"), val = tensor([24])]; tensor attention_23_update_end_0_values1_0 = const()[name = string("attention_23_update_end_0_values1_0"), val = tensor([2])]; tensor attention_23_update_end_0_values3_0 = const()[name = string("attention_23_update_end_0_values3_0"), val = tensor([64])]; int32 attention_23_update_end_0_axis_0 = const()[name = string("attention_23_update_end_0_axis_0"), val = int32(0)]; bool attention_23_update_end_0_interleave_0 = const()[name = string("attention_23_update_end_0_interleave_0"), val = bool(false)]; tensor attention_23_update_end_0 = concat(axis = attention_23_update_end_0_axis_0, interleave = attention_23_update_end_0_interleave_0, values = (attention_23_update_end_0_values0_0, attention_23_update_end_0_values1_0, end_pos_0, attention_23_update_end_0_values3_0))[name = string("attention_23_update_end_0")]; tensor attention_23_updated_key_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_key_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_updated_key_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_key_cache_0_squeeze_mask_0, update = attention_23_k_rope, x = coreml_update_state_44)[name = string("attention_23_updated_key_cache_0")]; write_state(data = attention_23_updated_key_cache_0, input = key_cache_state)[name = string("coreml_update_state_94_write_state")]; tensor coreml_update_state_46 = read_state(input = key_cache_state)[name = string("coreml_update_state_94")]; tensor attention_23_key_cache_begin_0 = const()[name = string("attention_23_key_cache_begin_0"), val = tensor([23, 0, 0, 0])]; tensor attention_23_key_cache_end_0 = const()[name = string("attention_23_key_cache_end_0"), val = tensor([24, 2, 512, 64])]; tensor attention_23_key_cache_squeeze_mask_0 = const()[name = string("attention_23_key_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_key_cache = slice_by_index(begin = attention_23_key_cache_begin_0, end = attention_23_key_cache_end_0, squeeze_mask = attention_23_key_cache_squeeze_mask_0, x = coreml_update_state_46)[name = string("attention_23_key_cache")]; int32 attention_23_key_cache_head_axis_0 = const()[name = string("attention_23_key_cache_head_axis_0"), val = int32(1)]; int32 attention_23_key_cache_head_num_splits_0 = const()[name = string("attention_23_key_cache_head_num_splits_0"), val = int32(2)]; tensor attention_23_key_cache_head_0, tensor attention_23_key_cache_head_1 = split(axis = attention_23_key_cache_head_axis_0, num_splits = attention_23_key_cache_head_num_splits_0, x = attention_23_key_cache)[name = string("attention_23_key_cache_head")]; tensor attention_23_updated_value_cache_0_squeeze_mask_0 = const()[name = string("attention_23_updated_value_cache_0_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_updated_value_cache_0 = slice_update(begin = attention_23_update_begin_0, end = attention_23_update_end_0, squeeze_mask = attention_23_updated_value_cache_0_squeeze_mask_0, update = attention_23_split_qkv_heads_2, x = coreml_update_state_45)[name = string("attention_23_updated_value_cache_0")]; write_state(data = attention_23_updated_value_cache_0, input = value_cache_state)[name = string("coreml_update_state_95_write_state")]; tensor coreml_update_state_47 = read_state(input = value_cache_state)[name = string("coreml_update_state_95")]; tensor attention_23_slice_current_layer_value_cache_begin_0 = const()[name = string("attention_23_slice_current_layer_value_cache_begin_0"), val = tensor([23, 0, 0, 0])]; tensor attention_23_slice_current_layer_value_cache_end_0 = const()[name = string("attention_23_slice_current_layer_value_cache_end_0"), val = tensor([24, 2, 512, 64])]; tensor attention_23_slice_current_layer_value_cache_squeeze_mask_0 = const()[name = string("attention_23_slice_current_layer_value_cache_squeeze_mask_0"), val = tensor([false, false, false, false])]; tensor attention_23_slice_current_layer_value_cache = slice_by_index(begin = attention_23_slice_current_layer_value_cache_begin_0, end = attention_23_slice_current_layer_value_cache_end_0, squeeze_mask = attention_23_slice_current_layer_value_cache_squeeze_mask_0, x = coreml_update_state_47)[name = string("attention_23_slice_current_layer_value_cache")]; int32 attention_23_slice_value_cache_heads_axis_0 = const()[name = string("attention_23_slice_value_cache_heads_axis_0"), val = int32(1)]; int32 attention_23_slice_value_cache_heads_num_splits_0 = const()[name = string("attention_23_slice_value_cache_heads_num_splits_0"), val = int32(2)]; tensor attention_23_slice_value_cache_heads_0, tensor attention_23_slice_value_cache_heads_1 = split(axis = attention_23_slice_value_cache_heads_axis_0, num_splits = attention_23_slice_value_cache_heads_num_splits_0, x = attention_23_slice_current_layer_value_cache)[name = string("attention_23_slice_value_cache_heads")]; bool attention_23_scores_0_transpose_y_0 = const()[name = string("attention_23_scores_0_transpose_y_0"), val = bool(true)]; bool attention_23_scores_0_transpose_x_0 = const()[name = string("attention_23_scores_0_transpose_x_0"), val = bool(false)]; tensor attention_23_scores_0 = matmul(transpose_x = attention_23_scores_0_transpose_x_0, transpose_y = attention_23_scores_0_transpose_y_0, x = attention_23_key_cache_head_0, y = attention_23_q_splits_0)[name = string("attention_23_scores_0")]; fp16 attention_23_scaled_scores_0_y_0 = const()[name = string("attention_23_scaled_scores_0_y_0"), val = fp16(0x1p-3)]; tensor attention_23_scaled_scores_0 = mul(x = attention_23_scores_0, y = attention_23_scaled_scores_0_y_0)[name = string("attention_23_scaled_scores_0")]; tensor attention_23_masked_scaled_scores_0 = add(x = attention_23_scaled_scores_0, y = transpose_0)[name = string("attention_23_masked_scaled_scores_0")]; int32 softmax_46_axis_0 = const()[name = string("softmax_46_axis_0"), val = int32(-2)]; tensor softmax_46 = softmax(axis = softmax_46_axis_0, x = attention_23_masked_scaled_scores_0)[name = string("softmax_46")]; bool attention_23_attention_0_transpose_x_0 = const()[name = string("attention_23_attention_0_transpose_x_0"), val = bool(true)]; bool attention_23_attention_0_transpose_y_0 = const()[name = string("attention_23_attention_0_transpose_y_0"), val = bool(false)]; tensor attention_23_attention_0 = matmul(transpose_x = attention_23_attention_0_transpose_x_0, transpose_y = attention_23_attention_0_transpose_y_0, x = softmax_46, y = attention_23_slice_value_cache_heads_0)[name = string("attention_23_attention_0")]; bool attention_23_scores_1_transpose_y_0 = const()[name = string("attention_23_scores_1_transpose_y_0"), val = bool(true)]; bool attention_23_scores_1_transpose_x_0 = const()[name = string("attention_23_scores_1_transpose_x_0"), val = bool(false)]; tensor attention_23_scores_1 = matmul(transpose_x = attention_23_scores_1_transpose_x_0, transpose_y = attention_23_scores_1_transpose_y_0, x = attention_23_key_cache_head_1, y = attention_23_q_splits_1)[name = string("attention_23_scores_1")]; fp16 attention_23_scaled_scores_1_y_0 = const()[name = string("attention_23_scaled_scores_1_y_0"), val = fp16(0x1p-3)]; tensor attention_23_scaled_scores_1 = mul(x = attention_23_scores_1, y = attention_23_scaled_scores_1_y_0)[name = string("attention_23_scaled_scores_1")]; tensor attention_23_masked_scaled_scores_1 = add(x = attention_23_scaled_scores_1, y = transpose_0)[name = string("attention_23_masked_scaled_scores_1")]; int32 softmax_47_axis_0 = const()[name = string("softmax_47_axis_0"), val = int32(-2)]; tensor softmax_47 = softmax(axis = softmax_47_axis_0, x = attention_23_masked_scaled_scores_1)[name = string("softmax_47")]; bool attention_23_attention_1_transpose_x_0 = const()[name = string("attention_23_attention_1_transpose_x_0"), val = bool(true)]; bool attention_23_attention_1_transpose_y_0 = const()[name = string("attention_23_attention_1_transpose_y_0"), val = bool(false)]; tensor attention_23_attention_1 = matmul(transpose_x = attention_23_attention_1_transpose_x_0, transpose_y = attention_23_attention_1_transpose_y_0, x = softmax_47, y = attention_23_slice_value_cache_heads_1)[name = string("attention_23_attention_1")]; int32 attention_23_concat_attention_all_heads_axis_0 = const()[name = string("attention_23_concat_attention_all_heads_axis_0"), val = int32(1)]; bool attention_23_concat_attention_all_heads_interleave_0 = const()[name = string("attention_23_concat_attention_all_heads_interleave_0"), val = bool(false)]; tensor attention_23_concat_attention_all_heads = concat(axis = attention_23_concat_attention_all_heads_axis_0, interleave = attention_23_concat_attention_all_heads_interleave_0, values = (attention_23_attention_0, attention_23_attention_1))[name = string("attention_23_concat_attention_all_heads")]; tensor attention_23_channels_first_retransposed_perm_0 = const()[name = string("attention_23_channels_first_retransposed_perm_0"), val = tensor([0, 1, 3, 2])]; tensor attention_23_reshaped_shape_0 = const()[name = string("attention_23_reshaped_shape_0"), val = tensor([1, 896, 64])]; tensor attention_23_channels_first_retransposed = transpose(perm = attention_23_channels_first_retransposed_perm_0, x = attention_23_concat_attention_all_heads)[name = string("transpose_1")]; tensor attention_23_reshaped = reshape(shape = attention_23_reshaped_shape_0, x = attention_23_channels_first_retransposed)[name = string("attention_23_reshaped")]; tensor attention_23_outproj_weight_0 = const()[name = string("attention_23_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560458240)))]; tensor attention_23_outproj_strides_0 = const()[name = string("attention_23_outproj_strides_0"), val = tensor([1])]; string attention_23_outproj_pad_type_0 = const()[name = string("attention_23_outproj_pad_type_0"), val = string("valid")]; tensor attention_23_outproj_pad_0 = const()[name = string("attention_23_outproj_pad_0"), val = tensor([0, 0])]; tensor attention_23_outproj_dilations_0 = const()[name = string("attention_23_outproj_dilations_0"), val = tensor([1])]; int32 attention_23_outproj_groups_0 = const()[name = string("attention_23_outproj_groups_0"), val = int32(1)]; tensor attention_23_outproj = conv(dilations = attention_23_outproj_dilations_0, groups = attention_23_outproj_groups_0, pad = attention_23_outproj_pad_0, pad_type = attention_23_outproj_pad_type_0, strides = attention_23_outproj_strides_0, weight = attention_23_outproj_weight_0, x = attention_23_reshaped)[name = string("attention_23_outproj")]; tensor block_23_residual_1 = add(x = block_22_residual_2, y = attention_23_outproj)[name = string("block_23_residual_1")]; tensor block_23_ffn_rmsnorm_abs = abs(x = block_23_residual_1)[name = string("block_23_ffn_rmsnorm_abs")]; tensor block_23_ffn_rmsnorm_maxval_axes_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool block_23_ffn_rmsnorm_maxval_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor block_23_ffn_rmsnorm_maxval = reduce_max(axes = block_23_ffn_rmsnorm_maxval_axes_0, keep_dims = block_23_ffn_rmsnorm_maxval_keep_dims_0, x = block_23_ffn_rmsnorm_abs)[name = string("block_23_ffn_rmsnorm_maxval")]; fp16 block_23_ffn_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 block_23_ffn_rmsnorm_maxval_clipped_beta_0 = const()[name = string("block_23_ffn_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor block_23_ffn_rmsnorm_maxval_clipped = clip(alpha = block_23_ffn_rmsnorm_maxval_clipped_alpha_0, beta = block_23_ffn_rmsnorm_maxval_clipped_beta_0, x = block_23_ffn_rmsnorm_maxval)[name = string("block_23_ffn_rmsnorm_maxval_clipped")]; tensor block_23_ffn_rmsnorm_scaled = real_div(x = block_23_residual_1, y = block_23_ffn_rmsnorm_maxval_clipped)[name = string("block_23_ffn_rmsnorm_scaled")]; tensor block_23_ffn_rmsnorm_squared_sum_axes_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool block_23_ffn_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("block_23_ffn_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor block_23_ffn_rmsnorm_squared_sum = reduce_sum_square(axes = block_23_ffn_rmsnorm_squared_sum_axes_0, keep_dims = block_23_ffn_rmsnorm_squared_sum_keep_dims_0, x = block_23_ffn_rmsnorm_scaled)[name = string("block_23_ffn_rmsnorm_squared_sum")]; fp16 block_23_ffn_rmsnorm_rsqrt_epsilon_0 = const()[name = string("block_23_ffn_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor block_23_ffn_rmsnorm_rsqrt = rsqrt(epsilon = block_23_ffn_rmsnorm_rsqrt_epsilon_0, x = block_23_ffn_rmsnorm_squared_sum)[name = string("block_23_ffn_rmsnorm_rsqrt")]; fp16 block_23_ffn_rmsnorm_dim_scaled_y_0 = const()[name = string("block_23_ffn_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor block_23_ffn_rmsnorm_dim_scaled = mul(x = block_23_ffn_rmsnorm_scaled, y = block_23_ffn_rmsnorm_dim_scaled_y_0)[name = string("block_23_ffn_rmsnorm_dim_scaled")]; tensor block_23_ffn_rmsnorm_normalized = mul(x = block_23_ffn_rmsnorm_dim_scaled, y = block_23_ffn_rmsnorm_rsqrt)[name = string("block_23_ffn_rmsnorm_normalized")]; tensor block_23_ffn_rmsnorm_y_0 = const()[name = string("block_23_ffn_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562063936)))]; tensor block_23_ffn_rmsnorm = mul(x = block_23_ffn_rmsnorm_normalized, y = block_23_ffn_rmsnorm_y_0)[name = string("block_23_ffn_rmsnorm")]; tensor block_23_ffn_inproj_weight_0 = const()[name = string("block_23_ffn_inproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562065792)))]; tensor block_23_ffn_inproj_strides_0 = const()[name = string("block_23_ffn_inproj_strides_0"), val = tensor([1])]; string block_23_ffn_inproj_pad_type_0 = const()[name = string("block_23_ffn_inproj_pad_type_0"), val = string("valid")]; tensor block_23_ffn_inproj_pad_0 = const()[name = string("block_23_ffn_inproj_pad_0"), val = tensor([0, 0])]; tensor block_23_ffn_inproj_dilations_0 = const()[name = string("block_23_ffn_inproj_dilations_0"), val = tensor([1])]; int32 block_23_ffn_inproj_groups_0 = const()[name = string("block_23_ffn_inproj_groups_0"), val = int32(1)]; tensor block_23_ffn_inproj = conv(dilations = block_23_ffn_inproj_dilations_0, groups = block_23_ffn_inproj_groups_0, pad = block_23_ffn_inproj_pad_0, pad_type = block_23_ffn_inproj_pad_type_0, strides = block_23_ffn_inproj_strides_0, weight = block_23_ffn_inproj_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_inproj")]; tensor block_23_ffn_g_weight_0 = const()[name = string("block_23_ffn_g_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(570782144)))]; tensor block_23_ffn_g_strides_0 = const()[name = string("block_23_ffn_g_strides_0"), val = tensor([1])]; string block_23_ffn_g_pad_type_0 = const()[name = string("block_23_ffn_g_pad_type_0"), val = string("valid")]; tensor block_23_ffn_g_pad_0 = const()[name = string("block_23_ffn_g_pad_0"), val = tensor([0, 0])]; tensor block_23_ffn_g_dilations_0 = const()[name = string("block_23_ffn_g_dilations_0"), val = tensor([1])]; int32 block_23_ffn_g_groups_0 = const()[name = string("block_23_ffn_g_groups_0"), val = int32(1)]; tensor block_23_ffn_g = conv(dilations = block_23_ffn_g_dilations_0, groups = block_23_ffn_g_groups_0, pad = block_23_ffn_g_pad_0, pad_type = block_23_ffn_g_pad_type_0, strides = block_23_ffn_g_strides_0, weight = block_23_ffn_g_weight_0, x = block_23_ffn_rmsnorm)[name = string("block_23_ffn_g")]; tensor block_23_ffn_g_activation = silu(x = block_23_ffn_g)[name = string("block_23_ffn_g_activation")]; tensor block_23_ffn_x_gated = mul(x = block_23_ffn_inproj, y = block_23_ffn_g_activation)[name = string("block_23_ffn_x_gated")]; tensor block_23_ffn_outproj_weight_0 = const()[name = string("block_23_ffn_outproj_weight_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(579498496)))]; tensor block_23_ffn_outproj_strides_0 = const()[name = string("block_23_ffn_outproj_strides_0"), val = tensor([1])]; string block_23_ffn_outproj_pad_type_0 = const()[name = string("block_23_ffn_outproj_pad_type_0"), val = string("valid")]; tensor block_23_ffn_outproj_pad_0 = const()[name = string("block_23_ffn_outproj_pad_0"), val = tensor([0, 0])]; tensor block_23_ffn_outproj_dilations_0 = const()[name = string("block_23_ffn_outproj_dilations_0"), val = tensor([1])]; int32 block_23_ffn_outproj_groups_0 = const()[name = string("block_23_ffn_outproj_groups_0"), val = int32(1)]; tensor block_23_ffn_outproj = conv(dilations = block_23_ffn_outproj_dilations_0, groups = block_23_ffn_outproj_groups_0, pad = block_23_ffn_outproj_pad_0, pad_type = block_23_ffn_outproj_pad_type_0, strides = block_23_ffn_outproj_strides_0, weight = block_23_ffn_outproj_weight_0, x = block_23_ffn_x_gated)[name = string("block_23_ffn_outproj")]; tensor block_23_residual_2 = add(x = block_23_ffn_outproj, y = block_23_residual_1)[name = string("block_23_residual_2")]; tensor final_norm_rmsnorm_abs = abs(x = block_23_residual_2)[name = string("final_norm_rmsnorm_abs")]; tensor final_norm_rmsnorm_maxval_axes_0 = const()[name = string("final_norm_rmsnorm_maxval_axes_0"), val = tensor([1])]; bool final_norm_rmsnorm_maxval_keep_dims_0 = const()[name = string("final_norm_rmsnorm_maxval_keep_dims_0"), val = bool(true)]; tensor final_norm_rmsnorm_maxval = reduce_max(axes = final_norm_rmsnorm_maxval_axes_0, keep_dims = final_norm_rmsnorm_maxval_keep_dims_0, x = final_norm_rmsnorm_abs)[name = string("final_norm_rmsnorm_maxval")]; fp16 final_norm_rmsnorm_maxval_clipped_alpha_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_alpha_0"), val = fp16(0x1p-14)]; fp16 final_norm_rmsnorm_maxval_clipped_beta_0 = const()[name = string("final_norm_rmsnorm_maxval_clipped_beta_0"), val = fp16(inf)]; tensor final_norm_rmsnorm_maxval_clipped = clip(alpha = final_norm_rmsnorm_maxval_clipped_alpha_0, beta = final_norm_rmsnorm_maxval_clipped_beta_0, x = final_norm_rmsnorm_maxval)[name = string("final_norm_rmsnorm_maxval_clipped")]; tensor final_norm_rmsnorm_scaled = real_div(x = block_23_residual_2, y = final_norm_rmsnorm_maxval_clipped)[name = string("final_norm_rmsnorm_scaled")]; tensor final_norm_rmsnorm_squared_sum_axes_0 = const()[name = string("final_norm_rmsnorm_squared_sum_axes_0"), val = tensor([1])]; bool final_norm_rmsnorm_squared_sum_keep_dims_0 = const()[name = string("final_norm_rmsnorm_squared_sum_keep_dims_0"), val = bool(true)]; tensor final_norm_rmsnorm_squared_sum = reduce_sum_square(axes = final_norm_rmsnorm_squared_sum_axes_0, keep_dims = final_norm_rmsnorm_squared_sum_keep_dims_0, x = final_norm_rmsnorm_scaled)[name = string("final_norm_rmsnorm_squared_sum")]; fp16 final_norm_rmsnorm_rsqrt_epsilon_0 = const()[name = string("final_norm_rmsnorm_rsqrt_epsilon_0"), val = fp16(0x1p-14)]; tensor final_norm_rmsnorm_rsqrt = rsqrt(epsilon = final_norm_rmsnorm_rsqrt_epsilon_0, x = final_norm_rmsnorm_squared_sum)[name = string("final_norm_rmsnorm_rsqrt")]; fp16 final_norm_rmsnorm_dim_scaled_y_0 = const()[name = string("final_norm_rmsnorm_dim_scaled_y_0"), val = fp16(0x1.dfp+4)]; tensor final_norm_rmsnorm_dim_scaled = mul(x = final_norm_rmsnorm_scaled, y = final_norm_rmsnorm_dim_scaled_y_0)[name = string("final_norm_rmsnorm_dim_scaled")]; tensor final_norm_rmsnorm_normalized = mul(x = final_norm_rmsnorm_dim_scaled, y = final_norm_rmsnorm_rsqrt)[name = string("final_norm_rmsnorm_normalized")]; tensor final_norm_rmsnorm_y_0 = const()[name = string("final_norm_rmsnorm_y_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588214848)))]; tensor final_norm_rmsnorm = mul(x = final_norm_rmsnorm_normalized, y = final_norm_rmsnorm_y_0)[name = string("final_norm_rmsnorm")]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588216704)))]; tensor logits_0_strides_0 = const()[name = string("logits_0_strides_0"), val = tensor([1])]; string logits_0_pad_type_0 = const()[name = string("logits_0_pad_type_0"), val = string("valid")]; tensor logits_0_pad_0 = const()[name = string("logits_0_pad_0"), val = tensor([0, 0])]; tensor logits_0_dilations_0 = const()[name = string("logits_0_dilations_0"), val = tensor([1])]; int32 logits_0_groups_0 = const()[name = string("logits_0_groups_0"), val = int32(1)]; tensor logits_0 = conv(dilations = logits_0_dilations_0, groups = logits_0_groups_0, pad = logits_0_pad_0, pad_type = logits_0_pad_type_0, strides = logits_0_strides_0, weight = expand_dims_1, x = final_norm_rmsnorm)[name = string("logits_0")]; tensor expand_dims_2 = const()[name = string("expand_dims_2"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617576896)))]; tensor logits_1_strides_0 = const()[name = string("logits_1_strides_0"), val = tensor([1])]; string logits_1_pad_type_0 = const()[name = string("logits_1_pad_type_0"), val = string("valid")]; tensor logits_1_pad_0 = const()[name = string("logits_1_pad_0"), val = tensor([0, 0])]; tensor logits_1_dilations_0 = const()[name = string("logits_1_dilations_0"), val = tensor([1])]; int32 logits_1_groups_0 = const()[name = string("logits_1_groups_0"), val = int32(1)]; tensor logits_1 = conv(dilations = logits_1_dilations_0, groups = logits_1_groups_0, pad = logits_1_pad_0, pad_type = logits_1_pad_type_0, strides = logits_1_strides_0, weight = expand_dims_2, x = final_norm_rmsnorm)[name = string("logits_1")]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(646937088)))]; tensor logits_2_strides_0 = const()[name = string("logits_2_strides_0"), val = tensor([1])]; string logits_2_pad_type_0 = const()[name = string("logits_2_pad_type_0"), val = string("valid")]; tensor logits_2_pad_0 = const()[name = string("logits_2_pad_0"), val = tensor([0, 0])]; tensor logits_2_dilations_0 = const()[name = string("logits_2_dilations_0"), val = tensor([1])]; int32 logits_2_groups_0 = const()[name = string("logits_2_groups_0"), val = int32(1)]; tensor logits_2 = conv(dilations = logits_2_dilations_0, groups = logits_2_groups_0, pad = logits_2_pad_0, pad_type = logits_2_pad_type_0, strides = logits_2_strides_0, weight = expand_dims_3, x = final_norm_rmsnorm)[name = string("logits_2")]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676297280)))]; tensor logits_3_strides_0 = const()[name = string("logits_3_strides_0"), val = tensor([1])]; string logits_3_pad_type_0 = const()[name = string("logits_3_pad_type_0"), val = string("valid")]; tensor logits_3_pad_0 = const()[name = string("logits_3_pad_0"), val = tensor([0, 0])]; tensor logits_3_dilations_0 = const()[name = string("logits_3_dilations_0"), val = tensor([1])]; int32 logits_3_groups_0 = const()[name = string("logits_3_groups_0"), val = int32(1)]; tensor logits_3 = conv(dilations = logits_3_dilations_0, groups = logits_3_groups_0, pad = logits_3_pad_0, pad_type = logits_3_pad_type_0, strides = logits_3_strides_0, weight = expand_dims_4, x = final_norm_rmsnorm)[name = string("logits_3")]; tensor expand_dims_5 = const()[name = string("expand_dims_5"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(705657472)))]; tensor logits_4_strides_0 = const()[name = string("logits_4_strides_0"), val = tensor([1])]; string logits_4_pad_type_0 = const()[name = string("logits_4_pad_type_0"), val = string("valid")]; tensor logits_4_pad_0 = const()[name = string("logits_4_pad_0"), val = tensor([0, 0])]; tensor logits_4_dilations_0 = const()[name = string("logits_4_dilations_0"), val = tensor([1])]; int32 logits_4_groups_0 = const()[name = string("logits_4_groups_0"), val = int32(1)]; tensor logits_4 = conv(dilations = logits_4_dilations_0, groups = logits_4_groups_0, pad = logits_4_pad_0, pad_type = logits_4_pad_type_0, strides = logits_4_strides_0, weight = expand_dims_5, x = final_norm_rmsnorm)[name = string("logits_4")]; tensor expand_dims_6 = const()[name = string("expand_dims_6"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(735017664)))]; tensor logits_5_strides_0 = const()[name = string("logits_5_strides_0"), val = tensor([1])]; string logits_5_pad_type_0 = const()[name = string("logits_5_pad_type_0"), val = string("valid")]; tensor logits_5_pad_0 = const()[name = string("logits_5_pad_0"), val = tensor([0, 0])]; tensor logits_5_dilations_0 = const()[name = string("logits_5_dilations_0"), val = tensor([1])]; int32 logits_5_groups_0 = const()[name = string("logits_5_groups_0"), val = int32(1)]; tensor logits_5 = conv(dilations = logits_5_dilations_0, groups = logits_5_groups_0, pad = logits_5_pad_0, pad_type = logits_5_pad_type_0, strides = logits_5_strides_0, weight = expand_dims_6, x = final_norm_rmsnorm)[name = string("logits_5")]; tensor expand_dims_7 = const()[name = string("expand_dims_7"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764377856)))]; tensor logits_6_strides_0 = const()[name = string("logits_6_strides_0"), val = tensor([1])]; string logits_6_pad_type_0 = const()[name = string("logits_6_pad_type_0"), val = string("valid")]; tensor logits_6_pad_0 = const()[name = string("logits_6_pad_0"), val = tensor([0, 0])]; tensor logits_6_dilations_0 = const()[name = string("logits_6_dilations_0"), val = tensor([1])]; int32 logits_6_groups_0 = const()[name = string("logits_6_groups_0"), val = int32(1)]; tensor logits_6 = conv(dilations = logits_6_dilations_0, groups = logits_6_groups_0, pad = logits_6_pad_0, pad_type = logits_6_pad_type_0, strides = logits_6_strides_0, weight = expand_dims_7, x = final_norm_rmsnorm)[name = string("logits_6")]; tensor expand_dims_8 = const()[name = string("expand_dims_8"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(793738048)))]; tensor logits_7_strides_0 = const()[name = string("logits_7_strides_0"), val = tensor([1])]; string logits_7_pad_type_0 = const()[name = string("logits_7_pad_type_0"), val = string("valid")]; tensor logits_7_pad_0 = const()[name = string("logits_7_pad_0"), val = tensor([0, 0])]; tensor logits_7_dilations_0 = const()[name = string("logits_7_dilations_0"), val = tensor([1])]; int32 logits_7_groups_0 = const()[name = string("logits_7_groups_0"), val = int32(1)]; tensor logits_7 = conv(dilations = logits_7_dilations_0, groups = logits_7_groups_0, pad = logits_7_pad_0, pad_type = logits_7_pad_type_0, strides = logits_7_strides_0, weight = expand_dims_8, x = final_norm_rmsnorm)[name = string("logits_7")]; tensor expand_dims_9 = const()[name = string("expand_dims_9"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(823098240)))]; tensor logits_8_strides_0 = const()[name = string("logits_8_strides_0"), val = tensor([1])]; string logits_8_pad_type_0 = const()[name = string("logits_8_pad_type_0"), val = string("valid")]; tensor logits_8_pad_0 = const()[name = string("logits_8_pad_0"), val = tensor([0, 0])]; tensor logits_8_dilations_0 = const()[name = string("logits_8_dilations_0"), val = tensor([1])]; int32 logits_8_groups_0 = const()[name = string("logits_8_groups_0"), val = int32(1)]; tensor logits_8 = conv(dilations = logits_8_dilations_0, groups = logits_8_groups_0, pad = logits_8_pad_0, pad_type = logits_8_pad_type_0, strides = logits_8_strides_0, weight = expand_dims_9, x = final_norm_rmsnorm)[name = string("logits_8")]; tensor expand_dims_10 = const()[name = string("expand_dims_10"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852458432)))]; tensor logits_9_strides_0 = const()[name = string("logits_9_strides_0"), val = tensor([1])]; string logits_9_pad_type_0 = const()[name = string("logits_9_pad_type_0"), val = string("valid")]; tensor logits_9_pad_0 = const()[name = string("logits_9_pad_0"), val = tensor([0, 0])]; tensor logits_9_dilations_0 = const()[name = string("logits_9_dilations_0"), val = tensor([1])]; int32 logits_9_groups_0 = const()[name = string("logits_9_groups_0"), val = int32(1)]; tensor logits_9 = conv(dilations = logits_9_dilations_0, groups = logits_9_groups_0, pad = logits_9_pad_0, pad_type = logits_9_pad_type_0, strides = logits_9_strides_0, weight = expand_dims_10, x = final_norm_rmsnorm)[name = string("logits_9")]; int32 _logits_axis_0 = const()[name = string("_logits_axis_0"), val = int32(1)]; bool _logits_interleave_0 = const()[name = string("_logits_interleave_0"), val = bool(false)]; tensor _logits = concat(axis = _logits_axis_0, interleave = _logits_interleave_0, values = (logits_0, logits_1, logits_2, logits_3, logits_4, logits_5, logits_6, logits_7, logits_8, logits_9))[name = string("_logits")]; string logits_dtype_0 = const()[name = string("logits_dtype_0"), val = string("fp32")]; tensor logits = cast(dtype = logits_dtype_0, x = _logits)[name = string("cast_0")]; } -> (logits); }