program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.6.1"}, {"coremlc-version", "3304.7.1"}})] { func main(tensor attention_mask, tensor input_ids) { tensor var_10 = const()[name = tensor("op_10"), val = tensor(-1)]; tensor var_23 = const()[name = tensor("op_23"), val = tensor(true)]; tensor hidden_states_1_axis_0 = const()[name = tensor("hidden_states_1_axis_0"), val = tensor(0)]; tensor model_model_embed_tokens_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32771200))), name = tensor("model_model_embed_tokens_weight_to_fp16_palettized"), shape = tensor([32003, 2048])]; tensor hidden_states_1_cast_batch_dims_0 = const()[name = tensor("hidden_states_1_cast_batch_dims_0"), val = tensor(0)]; tensor hidden_states_1_cast = gather(axis = hidden_states_1_axis_0, batch_dims = hidden_states_1_cast_batch_dims_0, indices = input_ids, x = model_model_embed_tokens_weight_to_fp16_palettized)[name = tensor("hidden_states_1_cast")]; tensor var_108_axes_0 = const()[name = tensor("op_108_axes_0"), val = tensor([1])]; tensor var_108 = expand_dims(axes = var_108_axes_0, x = attention_mask)[name = tensor("op_108")]; tensor var_109_axes_0 = const()[name = tensor("op_109_axes_0"), val = tensor([2])]; tensor var_109 = expand_dims(axes = var_109_axes_0, x = var_108)[name = tensor("op_109")]; tensor var_112_reps_0 = const()[name = tensor("op_112_reps_0"), val = tensor([1, 1, 128, 1])]; tensor var_112 = tile(reps = var_112_reps_0, x = var_109)[name = tensor("op_112")]; tensor var_16_to_fp16 = const()[name = tensor("op_16_to_fp16"), val = tensor(0x1p+0)]; tensor var_113_to_fp16_dtype_0 = const()[name = tensor("op_113_to_fp16_dtype_0"), val = tensor("fp16")]; tensor cast_2 = cast(dtype = var_113_to_fp16_dtype_0, x = var_112)[name = tensor("cast_2")]; tensor inverted_mask_cast = sub(x = var_16_to_fp16, y = cast_2)[name = tensor("inverted_mask_cast")]; tensor var_115_dtype_0 = const()[name = tensor("op_115_dtype_0"), val = tensor("bool")]; tensor var_11_to_fp16 = const()[name = tensor("op_11_to_fp16"), val = tensor(-inf)]; tensor cast_1 = cast(dtype = var_115_dtype_0, x = inverted_mask_cast)[name = tensor("cast_1")]; tensor var_116_cast = select(a = var_11_to_fp16, b = inverted_mask_cast, cond = cast_1)[name = tensor("op_116_cast")]; tensor op_97_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32771328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32779584))), name = tensor("op_97_to_fp16_palettized"), shape = tensor([1, 1, 128, 128])]; tensor attention_mask_cast = add(x = var_116_cast, y = op_97_to_fp16_palettized)[name = tensor("attention_mask_cast")]; tensor var_13_promoted_to_fp16 = const()[name = tensor("op_13_promoted_to_fp16"), val = tensor(0x1p+1)]; tensor var_125_cast = pow(x = hidden_states_1_cast, y = var_13_promoted_to_fp16)[name = tensor("op_125_cast")]; tensor var_126 = const()[name = tensor("op_126"), val = tensor([-1])]; tensor variance_1_cast = reduce_mean(axes = var_126, keep_dims = var_23, x = var_125_cast)[name = tensor("variance_1_cast")]; tensor var_128_to_fp16 = const()[name = tensor("op_128_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_129_cast = add(x = variance_1_cast, y = var_128_to_fp16)[name = tensor("op_129_cast")]; tensor var_130_epsilon_0_to_fp16 = const()[name = tensor("op_130_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_130_cast = rsqrt(epsilon = var_130_epsilon_0_to_fp16, x = var_129_cast)[name = tensor("op_130_cast")]; tensor hidden_states_5_cast = mul(x = hidden_states_1_cast, y = var_130_cast)[name = tensor("hidden_states_5_cast")]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32779712)))]; tensor hidden_states_9_cast = mul(x = model_model_layers_0_input_layernorm_weight_to_fp16, y = hidden_states_5_cast)[name = tensor("hidden_states_9_cast")]; tensor model_model_layers_0_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32783872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34881088))), name = tensor("model_model_layers_0_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34881216)))]; tensor linear_0_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_9_cast)[name = tensor("linear_0_cast")]; tensor model_model_layers_0_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34885376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35147584))), name = tensor("model_model_layers_0_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35147712)))]; tensor linear_1_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_9_cast)[name = tensor("linear_1_cast")]; tensor model_model_layers_0_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35148288))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35410496))), name = tensor("model_model_layers_0_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_2_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_9_cast)[name = tensor("linear_2_cast")]; tensor var_150 = const()[name = tensor("op_150"), val = tensor([1, 128, 32, 64])]; tensor var_151_cast = reshape(shape = var_150, x = linear_0_cast)[name = tensor("op_151_cast")]; tensor q_1_perm_0 = const()[name = tensor("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_153 = const()[name = tensor("op_153"), val = tensor([1, 128, 4, 64])]; tensor var_154_cast = reshape(shape = var_153, x = linear_1_cast)[name = tensor("op_154_cast")]; tensor key_states_3_perm_0 = const()[name = tensor("key_states_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_156 = const()[name = tensor("op_156"), val = tensor([1, 128, 4, 64])]; tensor var_157_cast = reshape(shape = var_156, x = linear_2_cast)[name = tensor("op_157_cast")]; tensor hidden_states_15_perm_0 = const()[name = tensor("hidden_states_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_1_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35410624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35414784))), name = tensor("cos_1_to_fp16_palettized"), shape = tensor([1, 1, 128, 64])]; tensor transpose_109 = transpose(perm = q_1_perm_0, x = var_151_cast)[name = tensor("transpose_109")]; tensor var_183_cast = mul(x = transpose_109, y = cos_1_to_fp16_palettized)[name = tensor("op_183_cast")]; tensor x1_1_begin_0 = const()[name = tensor("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = tensor("x1_1_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_1_end_mask_0 = const()[name = tensor("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = transpose_109)[name = tensor("x1_1_cast")]; tensor x2_1_begin_0 = const()[name = tensor("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = tensor("x2_1_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_1_end_mask_0 = const()[name = tensor("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = transpose_109)[name = tensor("x2_1_cast")]; tensor const_11_promoted_to_fp16 = const()[name = tensor("const_11_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_194_cast = mul(x = x2_1_cast, y = const_11_promoted_to_fp16)[name = tensor("op_194_cast")]; tensor var_196_interleave_0 = const()[name = tensor("op_196_interleave_0"), val = tensor(false)]; tensor var_196_cast = concat(axis = var_10, interleave = var_196_interleave_0, values = (var_194_cast, x1_1_cast))[name = tensor("op_196_cast")]; tensor sin_1_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35414912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35419072))), name = tensor("sin_1_to_fp16_palettized"), shape = tensor([1, 1, 128, 64])]; tensor var_197_cast = mul(x = var_196_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_197_cast")]; tensor query_states_3_cast = add(x = var_183_cast, y = var_197_cast)[name = tensor("query_states_3_cast")]; tensor transpose_108 = transpose(perm = key_states_3_perm_0, x = var_154_cast)[name = tensor("transpose_108")]; tensor var_199_cast = mul(x = transpose_108, y = cos_1_to_fp16_palettized)[name = tensor("op_199_cast")]; tensor x1_3_begin_0 = const()[name = tensor("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = tensor("x1_3_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_3_end_mask_0 = const()[name = tensor("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = transpose_108)[name = tensor("x1_3_cast")]; tensor x2_3_begin_0 = const()[name = tensor("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = tensor("x2_3_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_3_end_mask_0 = const()[name = tensor("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = transpose_108)[name = tensor("x2_3_cast")]; tensor const_14_promoted_to_fp16 = const()[name = tensor("const_14_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_210_cast = mul(x = x2_3_cast, y = const_14_promoted_to_fp16)[name = tensor("op_210_cast")]; tensor var_212_interleave_0 = const()[name = tensor("op_212_interleave_0"), val = tensor(false)]; tensor var_212_cast = concat(axis = var_10, interleave = var_212_interleave_0, values = (var_210_cast, x1_3_cast))[name = tensor("op_212_cast")]; tensor var_213_cast = mul(x = var_212_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_213_cast")]; tensor hidden_states_11_cast = add(x = var_199_cast, y = var_213_cast)[name = tensor("hidden_states_11_cast")]; tensor var_222_axes_0 = const()[name = tensor("op_222_axes_0"), val = tensor([2])]; tensor var_222_cast = expand_dims(axes = var_222_axes_0, x = hidden_states_11_cast)[name = tensor("op_222_cast")]; tensor hidden_states_13_reps_0 = const()[name = tensor("hidden_states_13_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_13_cast = tile(reps = hidden_states_13_reps_0, x = var_222_cast)[name = tensor("hidden_states_13_cast")]; tensor var_230 = const()[name = tensor("op_230"), val = tensor([1, 32, 128, 64])]; tensor key_states_5_cast = reshape(shape = var_230, x = hidden_states_13_cast)[name = tensor("key_states_5_cast")]; tensor var_239_axes_0 = const()[name = tensor("op_239_axes_0"), val = tensor([2])]; tensor transpose_107 = transpose(perm = hidden_states_15_perm_0, x = var_157_cast)[name = tensor("transpose_107")]; tensor var_239_cast = expand_dims(axes = var_239_axes_0, x = transpose_107)[name = tensor("op_239_cast")]; tensor hidden_states_17_reps_0 = const()[name = tensor("hidden_states_17_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_17_cast = tile(reps = hidden_states_17_reps_0, x = var_239_cast)[name = tensor("hidden_states_17_cast")]; tensor var_247 = const()[name = tensor("op_247"), val = tensor([1, 32, 128, 64])]; tensor value_states_3_cast = reshape(shape = var_247, x = hidden_states_17_cast)[name = tensor("value_states_3_cast")]; tensor var_249_perm_0 = const()[name = tensor("op_249_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_250_transpose_x_0 = const()[name = tensor("op_250_transpose_x_0"), val = tensor(false)]; tensor var_250_transpose_y_0 = const()[name = tensor("op_250_transpose_y_0"), val = tensor(false)]; tensor transpose_106 = transpose(perm = var_249_perm_0, x = key_states_5_cast)[name = tensor("transpose_106")]; tensor var_250_cast = matmul(transpose_x = var_250_transpose_x_0, transpose_y = var_250_transpose_y_0, x = query_states_3_cast, y = transpose_106)[name = tensor("op_250_cast")]; tensor _inversed_attn_weights_1_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_1_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_1_cast = mul(x = var_250_cast, y = _inversed_attn_weights_1_y_0_to_fp16)[name = tensor("_inversed_attn_weights_1_cast")]; tensor input_1_cast = add(x = _inversed_attn_weights_1_cast, y = attention_mask_cast)[name = tensor("input_1_cast")]; tensor var_254_cast = softmax(axis = var_10, x = input_1_cast)[name = tensor("op_254_cast")]; tensor attn_output_1_transpose_x_0 = const()[name = tensor("attn_output_1_transpose_x_0"), val = tensor(false)]; tensor attn_output_1_transpose_y_0 = const()[name = tensor("attn_output_1_transpose_y_0"), val = tensor(false)]; tensor attn_output_1_cast = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = var_254_cast, y = value_states_3_cast)[name = tensor("attn_output_1_cast")]; tensor var_257_perm_0 = const()[name = tensor("op_257_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_259 = const()[name = tensor("op_259"), val = tensor([1, 128, 2048])]; tensor transpose_105 = transpose(perm = var_257_perm_0, x = attn_output_1_cast)[name = tensor("transpose_105")]; tensor input_3_cast = reshape(shape = var_259, x = transpose_105)[name = tensor("input_3_cast")]; tensor model_model_layers_0_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35419200))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37516416))), name = tensor("model_model_layers_0_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_3_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_to_fp16_palettized, x = input_3_cast)[name = tensor("linear_3_cast")]; tensor hidden_states_21_cast = add(x = hidden_states_1_cast, y = linear_3_cast)[name = tensor("hidden_states_21_cast")]; tensor var_13_promoted_to_fp16_1 = const()[name = tensor("op_13_promoted_to_fp16_1"), val = tensor(0x1p+1)]; tensor var_266_cast = pow(x = hidden_states_21_cast, y = var_13_promoted_to_fp16_1)[name = tensor("op_266_cast")]; tensor var_267 = const()[name = tensor("op_267"), val = tensor([-1])]; tensor variance_3_cast = reduce_mean(axes = var_267, keep_dims = var_23, x = var_266_cast)[name = tensor("variance_3_cast")]; tensor var_269_to_fp16 = const()[name = tensor("op_269_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_270_cast = add(x = variance_3_cast, y = var_269_to_fp16)[name = tensor("op_270_cast")]; tensor var_271_epsilon_0_to_fp16 = const()[name = tensor("op_271_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_271_cast = rsqrt(epsilon = var_271_epsilon_0_to_fp16, x = var_270_cast)[name = tensor("op_271_cast")]; tensor hidden_states_25_cast = mul(x = hidden_states_21_cast, y = var_271_cast)[name = tensor("hidden_states_25_cast")]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37516544)))]; tensor input_5_cast = mul(x = model_model_layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_25_cast)[name = tensor("input_5_cast")]; tensor model_model_layers_0_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37520704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43287936))), name = tensor("model_model_layers_0_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_4_bias_0_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43288064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43290944))), name = tensor("linear_4_bias_0_to_fp16_palettized"), shape = tensor([5632])]; tensor linear_4_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_0_mlp_gate_proj_weight_to_fp16_palettized, x = input_5_cast)[name = tensor("linear_4_cast")]; tensor var_283_cast = silu(x = linear_4_cast)[name = tensor("op_283_cast")]; tensor model_model_layers_0_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43291072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49058304))), name = tensor("model_model_layers_0_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_5_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_0_mlp_up_proj_weight_to_fp16_palettized, x = input_5_cast)[name = tensor("linear_5_cast")]; tensor input_9_cast = mul(x = var_283_cast, y = linear_5_cast)[name = tensor("input_9_cast")]; tensor model_model_layers_0_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49058432))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54825664))), name = tensor("model_model_layers_0_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_6_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_mlp_down_proj_weight_to_fp16_palettized, x = input_9_cast)[name = tensor("linear_6_cast")]; tensor hidden_states_31_cast = add(x = hidden_states_21_cast, y = linear_6_cast)[name = tensor("hidden_states_31_cast")]; tensor var_13_promoted_to_fp16_2 = const()[name = tensor("op_13_promoted_to_fp16_2"), val = tensor(0x1p+1)]; tensor var_296_cast = pow(x = hidden_states_31_cast, y = var_13_promoted_to_fp16_2)[name = tensor("op_296_cast")]; tensor var_297 = const()[name = tensor("op_297"), val = tensor([-1])]; tensor variance_5_cast = reduce_mean(axes = var_297, keep_dims = var_23, x = var_296_cast)[name = tensor("variance_5_cast")]; tensor var_299_to_fp16 = const()[name = tensor("op_299_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_300_cast = add(x = variance_5_cast, y = var_299_to_fp16)[name = tensor("op_300_cast")]; tensor var_301_epsilon_0_to_fp16 = const()[name = tensor("op_301_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_301_cast = rsqrt(epsilon = var_301_epsilon_0_to_fp16, x = var_300_cast)[name = tensor("op_301_cast")]; tensor hidden_states_35_cast = mul(x = hidden_states_31_cast, y = var_301_cast)[name = tensor("hidden_states_35_cast")]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54825792)))]; tensor hidden_states_39_cast = mul(x = model_model_layers_1_input_layernorm_weight_to_fp16, y = hidden_states_35_cast)[name = tensor("hidden_states_39_cast")]; tensor model_model_layers_1_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(54829952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56927168))), name = tensor("model_model_layers_1_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_7_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_39_cast)[name = tensor("linear_7_cast")]; tensor model_model_layers_1_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56927296))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57189504))), name = tensor("model_model_layers_1_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_8_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_39_cast)[name = tensor("linear_8_cast")]; tensor model_model_layers_1_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57189632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57451840))), name = tensor("model_model_layers_1_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_9_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_39_cast)[name = tensor("linear_9_cast")]; tensor var_321 = const()[name = tensor("op_321"), val = tensor([1, 128, 32, 64])]; tensor var_322_cast = reshape(shape = var_321, x = linear_7_cast)[name = tensor("op_322_cast")]; tensor q_3_perm_0 = const()[name = tensor("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_324 = const()[name = tensor("op_324"), val = tensor([1, 128, 4, 64])]; tensor var_325_cast = reshape(shape = var_324, x = linear_8_cast)[name = tensor("op_325_cast")]; tensor key_states_9_perm_0 = const()[name = tensor("key_states_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_327 = const()[name = tensor("op_327"), val = tensor([1, 128, 4, 64])]; tensor var_328_cast = reshape(shape = var_327, x = linear_9_cast)[name = tensor("op_328_cast")]; tensor hidden_states_45_perm_0 = const()[name = tensor("hidden_states_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_104 = transpose(perm = q_3_perm_0, x = var_322_cast)[name = tensor("transpose_104")]; tensor var_354_cast = mul(x = transpose_104, y = cos_1_to_fp16_palettized)[name = tensor("op_354_cast")]; tensor x1_5_begin_0 = const()[name = tensor("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = tensor("x1_5_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_5_end_mask_0 = const()[name = tensor("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = transpose_104)[name = tensor("x1_5_cast")]; tensor x2_5_begin_0 = const()[name = tensor("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = tensor("x2_5_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_5_end_mask_0 = const()[name = tensor("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = transpose_104)[name = tensor("x2_5_cast")]; tensor const_28_promoted_to_fp16 = const()[name = tensor("const_28_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_365_cast = mul(x = x2_5_cast, y = const_28_promoted_to_fp16)[name = tensor("op_365_cast")]; tensor var_367_interleave_0 = const()[name = tensor("op_367_interleave_0"), val = tensor(false)]; tensor var_367_cast = concat(axis = var_10, interleave = var_367_interleave_0, values = (var_365_cast, x1_5_cast))[name = tensor("op_367_cast")]; tensor var_368_cast = mul(x = var_367_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_368_cast")]; tensor query_states_7_cast = add(x = var_354_cast, y = var_368_cast)[name = tensor("query_states_7_cast")]; tensor transpose_103 = transpose(perm = key_states_9_perm_0, x = var_325_cast)[name = tensor("transpose_103")]; tensor var_370_cast = mul(x = transpose_103, y = cos_1_to_fp16_palettized)[name = tensor("op_370_cast")]; tensor x1_7_begin_0 = const()[name = tensor("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = tensor("x1_7_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_7_end_mask_0 = const()[name = tensor("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = transpose_103)[name = tensor("x1_7_cast")]; tensor x2_7_begin_0 = const()[name = tensor("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = tensor("x2_7_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_7_end_mask_0 = const()[name = tensor("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = transpose_103)[name = tensor("x2_7_cast")]; tensor const_31_promoted_to_fp16 = const()[name = tensor("const_31_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_381_cast = mul(x = x2_7_cast, y = const_31_promoted_to_fp16)[name = tensor("op_381_cast")]; tensor var_383_interleave_0 = const()[name = tensor("op_383_interleave_0"), val = tensor(false)]; tensor var_383_cast = concat(axis = var_10, interleave = var_383_interleave_0, values = (var_381_cast, x1_7_cast))[name = tensor("op_383_cast")]; tensor var_384_cast = mul(x = var_383_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_384_cast")]; tensor hidden_states_41_cast = add(x = var_370_cast, y = var_384_cast)[name = tensor("hidden_states_41_cast")]; tensor var_393_axes_0 = const()[name = tensor("op_393_axes_0"), val = tensor([2])]; tensor var_393_cast = expand_dims(axes = var_393_axes_0, x = hidden_states_41_cast)[name = tensor("op_393_cast")]; tensor hidden_states_43_reps_0 = const()[name = tensor("hidden_states_43_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_43_cast = tile(reps = hidden_states_43_reps_0, x = var_393_cast)[name = tensor("hidden_states_43_cast")]; tensor var_401 = const()[name = tensor("op_401"), val = tensor([1, 32, 128, 64])]; tensor key_states_11_cast = reshape(shape = var_401, x = hidden_states_43_cast)[name = tensor("key_states_11_cast")]; tensor var_410_axes_0 = const()[name = tensor("op_410_axes_0"), val = tensor([2])]; tensor transpose_102 = transpose(perm = hidden_states_45_perm_0, x = var_328_cast)[name = tensor("transpose_102")]; tensor var_410_cast = expand_dims(axes = var_410_axes_0, x = transpose_102)[name = tensor("op_410_cast")]; tensor hidden_states_47_reps_0 = const()[name = tensor("hidden_states_47_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_47_cast = tile(reps = hidden_states_47_reps_0, x = var_410_cast)[name = tensor("hidden_states_47_cast")]; tensor var_418 = const()[name = tensor("op_418"), val = tensor([1, 32, 128, 64])]; tensor value_states_7_cast = reshape(shape = var_418, x = hidden_states_47_cast)[name = tensor("value_states_7_cast")]; tensor var_420_perm_0 = const()[name = tensor("op_420_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_421_transpose_x_0 = const()[name = tensor("op_421_transpose_x_0"), val = tensor(false)]; tensor var_421_transpose_y_0 = const()[name = tensor("op_421_transpose_y_0"), val = tensor(false)]; tensor transpose_101 = transpose(perm = var_420_perm_0, x = key_states_11_cast)[name = tensor("transpose_101")]; tensor var_421_cast = matmul(transpose_x = var_421_transpose_x_0, transpose_y = var_421_transpose_y_0, x = query_states_7_cast, y = transpose_101)[name = tensor("op_421_cast")]; tensor _inversed_attn_weights_5_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_5_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_5_cast = mul(x = var_421_cast, y = _inversed_attn_weights_5_y_0_to_fp16)[name = tensor("_inversed_attn_weights_5_cast")]; tensor input_11_cast = add(x = _inversed_attn_weights_5_cast, y = attention_mask_cast)[name = tensor("input_11_cast")]; tensor var_425_cast = softmax(axis = var_10, x = input_11_cast)[name = tensor("op_425_cast")]; tensor attn_output_5_transpose_x_0 = const()[name = tensor("attn_output_5_transpose_x_0"), val = tensor(false)]; tensor attn_output_5_transpose_y_0 = const()[name = tensor("attn_output_5_transpose_y_0"), val = tensor(false)]; tensor attn_output_5_cast = matmul(transpose_x = attn_output_5_transpose_x_0, transpose_y = attn_output_5_transpose_y_0, x = var_425_cast, y = value_states_7_cast)[name = tensor("attn_output_5_cast")]; tensor var_428_perm_0 = const()[name = tensor("op_428_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_430 = const()[name = tensor("op_430"), val = tensor([1, 128, 2048])]; tensor transpose_100 = transpose(perm = var_428_perm_0, x = attn_output_5_cast)[name = tensor("transpose_100")]; tensor input_13_cast = reshape(shape = var_430, x = transpose_100)[name = tensor("input_13_cast")]; tensor model_model_layers_1_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57451968))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59549184))), name = tensor("model_model_layers_1_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_10_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_to_fp16_palettized, x = input_13_cast)[name = tensor("linear_10_cast")]; tensor hidden_states_51_cast = add(x = hidden_states_31_cast, y = linear_10_cast)[name = tensor("hidden_states_51_cast")]; tensor var_13_promoted_to_fp16_3 = const()[name = tensor("op_13_promoted_to_fp16_3"), val = tensor(0x1p+1)]; tensor var_437_cast = pow(x = hidden_states_51_cast, y = var_13_promoted_to_fp16_3)[name = tensor("op_437_cast")]; tensor var_438 = const()[name = tensor("op_438"), val = tensor([-1])]; tensor variance_7_cast = reduce_mean(axes = var_438, keep_dims = var_23, x = var_437_cast)[name = tensor("variance_7_cast")]; tensor var_440_to_fp16 = const()[name = tensor("op_440_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_441_cast = add(x = variance_7_cast, y = var_440_to_fp16)[name = tensor("op_441_cast")]; tensor var_442_epsilon_0_to_fp16 = const()[name = tensor("op_442_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_442_cast = rsqrt(epsilon = var_442_epsilon_0_to_fp16, x = var_441_cast)[name = tensor("op_442_cast")]; tensor hidden_states_55_cast = mul(x = hidden_states_51_cast, y = var_442_cast)[name = tensor("hidden_states_55_cast")]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59549312)))]; tensor input_15_cast = mul(x = model_model_layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_55_cast)[name = tensor("input_15_cast")]; tensor model_model_layers_1_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59553472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65320704))), name = tensor("model_model_layers_1_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_11_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_1_mlp_gate_proj_weight_to_fp16_palettized, x = input_15_cast)[name = tensor("linear_11_cast")]; tensor var_454_cast = silu(x = linear_11_cast)[name = tensor("op_454_cast")]; tensor model_model_layers_1_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65320832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71088064))), name = tensor("model_model_layers_1_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_12_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_1_mlp_up_proj_weight_to_fp16_palettized, x = input_15_cast)[name = tensor("linear_12_cast")]; tensor input_19_cast = mul(x = var_454_cast, y = linear_12_cast)[name = tensor("input_19_cast")]; tensor model_model_layers_1_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71088192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76855424))), name = tensor("model_model_layers_1_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_13_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_mlp_down_proj_weight_to_fp16_palettized, x = input_19_cast)[name = tensor("linear_13_cast")]; tensor hidden_states_61_cast = add(x = hidden_states_51_cast, y = linear_13_cast)[name = tensor("hidden_states_61_cast")]; tensor var_13_promoted_to_fp16_4 = const()[name = tensor("op_13_promoted_to_fp16_4"), val = tensor(0x1p+1)]; tensor var_467_cast = pow(x = hidden_states_61_cast, y = var_13_promoted_to_fp16_4)[name = tensor("op_467_cast")]; tensor var_468 = const()[name = tensor("op_468"), val = tensor([-1])]; tensor variance_9_cast = reduce_mean(axes = var_468, keep_dims = var_23, x = var_467_cast)[name = tensor("variance_9_cast")]; tensor var_470_to_fp16 = const()[name = tensor("op_470_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_471_cast = add(x = variance_9_cast, y = var_470_to_fp16)[name = tensor("op_471_cast")]; tensor var_472_epsilon_0_to_fp16 = const()[name = tensor("op_472_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_472_cast = rsqrt(epsilon = var_472_epsilon_0_to_fp16, x = var_471_cast)[name = tensor("op_472_cast")]; tensor hidden_states_65_cast = mul(x = hidden_states_61_cast, y = var_472_cast)[name = tensor("hidden_states_65_cast")]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76855552)))]; tensor hidden_states_69_cast = mul(x = model_model_layers_2_input_layernorm_weight_to_fp16, y = hidden_states_65_cast)[name = tensor("hidden_states_69_cast")]; tensor model_model_layers_2_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76859712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78956928))), name = tensor("model_model_layers_2_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_14_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_69_cast)[name = tensor("linear_14_cast")]; tensor model_model_layers_2_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78957056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79219264))), name = tensor("model_model_layers_2_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_15_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_69_cast)[name = tensor("linear_15_cast")]; tensor model_model_layers_2_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79219392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79481600))), name = tensor("model_model_layers_2_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_16_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_69_cast)[name = tensor("linear_16_cast")]; tensor var_492 = const()[name = tensor("op_492"), val = tensor([1, 128, 32, 64])]; tensor var_493_cast = reshape(shape = var_492, x = linear_14_cast)[name = tensor("op_493_cast")]; tensor q_5_perm_0 = const()[name = tensor("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_495 = const()[name = tensor("op_495"), val = tensor([1, 128, 4, 64])]; tensor var_496_cast = reshape(shape = var_495, x = linear_15_cast)[name = tensor("op_496_cast")]; tensor key_states_15_perm_0 = const()[name = tensor("key_states_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_498 = const()[name = tensor("op_498"), val = tensor([1, 128, 4, 64])]; tensor var_499_cast = reshape(shape = var_498, x = linear_16_cast)[name = tensor("op_499_cast")]; tensor hidden_states_75_perm_0 = const()[name = tensor("hidden_states_75_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_99 = transpose(perm = q_5_perm_0, x = var_493_cast)[name = tensor("transpose_99")]; tensor var_525_cast = mul(x = transpose_99, y = cos_1_to_fp16_palettized)[name = tensor("op_525_cast")]; tensor x1_9_begin_0 = const()[name = tensor("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = tensor("x1_9_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_9_end_mask_0 = const()[name = tensor("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = transpose_99)[name = tensor("x1_9_cast")]; tensor x2_9_begin_0 = const()[name = tensor("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = tensor("x2_9_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_9_end_mask_0 = const()[name = tensor("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = transpose_99)[name = tensor("x2_9_cast")]; tensor const_45_promoted_to_fp16 = const()[name = tensor("const_45_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_536_cast = mul(x = x2_9_cast, y = const_45_promoted_to_fp16)[name = tensor("op_536_cast")]; tensor var_538_interleave_0 = const()[name = tensor("op_538_interleave_0"), val = tensor(false)]; tensor var_538_cast = concat(axis = var_10, interleave = var_538_interleave_0, values = (var_536_cast, x1_9_cast))[name = tensor("op_538_cast")]; tensor var_539_cast = mul(x = var_538_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_539_cast")]; tensor query_states_11_cast = add(x = var_525_cast, y = var_539_cast)[name = tensor("query_states_11_cast")]; tensor transpose_98 = transpose(perm = key_states_15_perm_0, x = var_496_cast)[name = tensor("transpose_98")]; tensor var_541_cast = mul(x = transpose_98, y = cos_1_to_fp16_palettized)[name = tensor("op_541_cast")]; tensor x1_11_begin_0 = const()[name = tensor("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = tensor("x1_11_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_11_end_mask_0 = const()[name = tensor("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = transpose_98)[name = tensor("x1_11_cast")]; tensor x2_11_begin_0 = const()[name = tensor("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = tensor("x2_11_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_11_end_mask_0 = const()[name = tensor("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = transpose_98)[name = tensor("x2_11_cast")]; tensor const_48_promoted_to_fp16 = const()[name = tensor("const_48_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_552_cast = mul(x = x2_11_cast, y = const_48_promoted_to_fp16)[name = tensor("op_552_cast")]; tensor var_554_interleave_0 = const()[name = tensor("op_554_interleave_0"), val = tensor(false)]; tensor var_554_cast = concat(axis = var_10, interleave = var_554_interleave_0, values = (var_552_cast, x1_11_cast))[name = tensor("op_554_cast")]; tensor var_555_cast = mul(x = var_554_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_555_cast")]; tensor hidden_states_71_cast = add(x = var_541_cast, y = var_555_cast)[name = tensor("hidden_states_71_cast")]; tensor var_564_axes_0 = const()[name = tensor("op_564_axes_0"), val = tensor([2])]; tensor var_564_cast = expand_dims(axes = var_564_axes_0, x = hidden_states_71_cast)[name = tensor("op_564_cast")]; tensor hidden_states_73_reps_0 = const()[name = tensor("hidden_states_73_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_73_cast = tile(reps = hidden_states_73_reps_0, x = var_564_cast)[name = tensor("hidden_states_73_cast")]; tensor var_572 = const()[name = tensor("op_572"), val = tensor([1, 32, 128, 64])]; tensor key_states_17_cast = reshape(shape = var_572, x = hidden_states_73_cast)[name = tensor("key_states_17_cast")]; tensor var_581_axes_0 = const()[name = tensor("op_581_axes_0"), val = tensor([2])]; tensor transpose_97 = transpose(perm = hidden_states_75_perm_0, x = var_499_cast)[name = tensor("transpose_97")]; tensor var_581_cast = expand_dims(axes = var_581_axes_0, x = transpose_97)[name = tensor("op_581_cast")]; tensor hidden_states_77_reps_0 = const()[name = tensor("hidden_states_77_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_77_cast = tile(reps = hidden_states_77_reps_0, x = var_581_cast)[name = tensor("hidden_states_77_cast")]; tensor var_589 = const()[name = tensor("op_589"), val = tensor([1, 32, 128, 64])]; tensor value_states_11_cast = reshape(shape = var_589, x = hidden_states_77_cast)[name = tensor("value_states_11_cast")]; tensor var_591_perm_0 = const()[name = tensor("op_591_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_592_transpose_x_0 = const()[name = tensor("op_592_transpose_x_0"), val = tensor(false)]; tensor var_592_transpose_y_0 = const()[name = tensor("op_592_transpose_y_0"), val = tensor(false)]; tensor transpose_96 = transpose(perm = var_591_perm_0, x = key_states_17_cast)[name = tensor("transpose_96")]; tensor var_592_cast = matmul(transpose_x = var_592_transpose_x_0, transpose_y = var_592_transpose_y_0, x = query_states_11_cast, y = transpose_96)[name = tensor("op_592_cast")]; tensor _inversed_attn_weights_9_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_9_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_9_cast = mul(x = var_592_cast, y = _inversed_attn_weights_9_y_0_to_fp16)[name = tensor("_inversed_attn_weights_9_cast")]; tensor input_21_cast = add(x = _inversed_attn_weights_9_cast, y = attention_mask_cast)[name = tensor("input_21_cast")]; tensor var_596_cast = softmax(axis = var_10, x = input_21_cast)[name = tensor("op_596_cast")]; tensor attn_output_9_transpose_x_0 = const()[name = tensor("attn_output_9_transpose_x_0"), val = tensor(false)]; tensor attn_output_9_transpose_y_0 = const()[name = tensor("attn_output_9_transpose_y_0"), val = tensor(false)]; tensor attn_output_9_cast = matmul(transpose_x = attn_output_9_transpose_x_0, transpose_y = attn_output_9_transpose_y_0, x = var_596_cast, y = value_states_11_cast)[name = tensor("attn_output_9_cast")]; tensor var_599_perm_0 = const()[name = tensor("op_599_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_601 = const()[name = tensor("op_601"), val = tensor([1, 128, 2048])]; tensor transpose_95 = transpose(perm = var_599_perm_0, x = attn_output_9_cast)[name = tensor("transpose_95")]; tensor input_23_cast = reshape(shape = var_601, x = transpose_95)[name = tensor("input_23_cast")]; tensor model_model_layers_2_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79481728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81578944))), name = tensor("model_model_layers_2_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_17_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_to_fp16_palettized, x = input_23_cast)[name = tensor("linear_17_cast")]; tensor hidden_states_81_cast = add(x = hidden_states_61_cast, y = linear_17_cast)[name = tensor("hidden_states_81_cast")]; tensor var_13_promoted_to_fp16_5 = const()[name = tensor("op_13_promoted_to_fp16_5"), val = tensor(0x1p+1)]; tensor var_608_cast = pow(x = hidden_states_81_cast, y = var_13_promoted_to_fp16_5)[name = tensor("op_608_cast")]; tensor var_609 = const()[name = tensor("op_609"), val = tensor([-1])]; tensor variance_11_cast = reduce_mean(axes = var_609, keep_dims = var_23, x = var_608_cast)[name = tensor("variance_11_cast")]; tensor var_611_to_fp16 = const()[name = tensor("op_611_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_612_cast = add(x = variance_11_cast, y = var_611_to_fp16)[name = tensor("op_612_cast")]; tensor var_613_epsilon_0_to_fp16 = const()[name = tensor("op_613_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_613_cast = rsqrt(epsilon = var_613_epsilon_0_to_fp16, x = var_612_cast)[name = tensor("op_613_cast")]; tensor hidden_states_85_cast = mul(x = hidden_states_81_cast, y = var_613_cast)[name = tensor("hidden_states_85_cast")]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81579072)))]; tensor input_25_cast = mul(x = model_model_layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_85_cast)[name = tensor("input_25_cast")]; tensor model_model_layers_2_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81583232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87350464))), name = tensor("model_model_layers_2_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_18_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_2_mlp_gate_proj_weight_to_fp16_palettized, x = input_25_cast)[name = tensor("linear_18_cast")]; tensor var_625_cast = silu(x = linear_18_cast)[name = tensor("op_625_cast")]; tensor model_model_layers_2_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87350592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93117824))), name = tensor("model_model_layers_2_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_19_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_2_mlp_up_proj_weight_to_fp16_palettized, x = input_25_cast)[name = tensor("linear_19_cast")]; tensor input_29_cast = mul(x = var_625_cast, y = linear_19_cast)[name = tensor("input_29_cast")]; tensor model_model_layers_2_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93117952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98885184))), name = tensor("model_model_layers_2_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_20_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_mlp_down_proj_weight_to_fp16_palettized, x = input_29_cast)[name = tensor("linear_20_cast")]; tensor hidden_states_91_cast = add(x = hidden_states_81_cast, y = linear_20_cast)[name = tensor("hidden_states_91_cast")]; tensor var_13_promoted_to_fp16_6 = const()[name = tensor("op_13_promoted_to_fp16_6"), val = tensor(0x1p+1)]; tensor var_638_cast = pow(x = hidden_states_91_cast, y = var_13_promoted_to_fp16_6)[name = tensor("op_638_cast")]; tensor var_639 = const()[name = tensor("op_639"), val = tensor([-1])]; tensor variance_13_cast = reduce_mean(axes = var_639, keep_dims = var_23, x = var_638_cast)[name = tensor("variance_13_cast")]; tensor var_641_to_fp16 = const()[name = tensor("op_641_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_642_cast = add(x = variance_13_cast, y = var_641_to_fp16)[name = tensor("op_642_cast")]; tensor var_643_epsilon_0_to_fp16 = const()[name = tensor("op_643_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_643_cast = rsqrt(epsilon = var_643_epsilon_0_to_fp16, x = var_642_cast)[name = tensor("op_643_cast")]; tensor hidden_states_95_cast = mul(x = hidden_states_91_cast, y = var_643_cast)[name = tensor("hidden_states_95_cast")]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98885312)))]; tensor hidden_states_99_cast = mul(x = model_model_layers_3_input_layernorm_weight_to_fp16, y = hidden_states_95_cast)[name = tensor("hidden_states_99_cast")]; tensor model_model_layers_3_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98889472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100986688))), name = tensor("model_model_layers_3_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_21_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_99_cast)[name = tensor("linear_21_cast")]; tensor model_model_layers_3_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100986816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101249024))), name = tensor("model_model_layers_3_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_22_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_99_cast)[name = tensor("linear_22_cast")]; tensor model_model_layers_3_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101249152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101511360))), name = tensor("model_model_layers_3_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_23_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_99_cast)[name = tensor("linear_23_cast")]; tensor var_663 = const()[name = tensor("op_663"), val = tensor([1, 128, 32, 64])]; tensor var_664_cast = reshape(shape = var_663, x = linear_21_cast)[name = tensor("op_664_cast")]; tensor q_7_perm_0 = const()[name = tensor("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_666 = const()[name = tensor("op_666"), val = tensor([1, 128, 4, 64])]; tensor var_667_cast = reshape(shape = var_666, x = linear_22_cast)[name = tensor("op_667_cast")]; tensor key_states_21_perm_0 = const()[name = tensor("key_states_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_669 = const()[name = tensor("op_669"), val = tensor([1, 128, 4, 64])]; tensor var_670_cast = reshape(shape = var_669, x = linear_23_cast)[name = tensor("op_670_cast")]; tensor hidden_states_105_perm_0 = const()[name = tensor("hidden_states_105_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_94 = transpose(perm = q_7_perm_0, x = var_664_cast)[name = tensor("transpose_94")]; tensor var_696_cast = mul(x = transpose_94, y = cos_1_to_fp16_palettized)[name = tensor("op_696_cast")]; tensor x1_13_begin_0 = const()[name = tensor("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = tensor("x1_13_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_13_end_mask_0 = const()[name = tensor("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = transpose_94)[name = tensor("x1_13_cast")]; tensor x2_13_begin_0 = const()[name = tensor("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = tensor("x2_13_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_13_end_mask_0 = const()[name = tensor("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = transpose_94)[name = tensor("x2_13_cast")]; tensor const_62_promoted_to_fp16 = const()[name = tensor("const_62_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_707_cast = mul(x = x2_13_cast, y = const_62_promoted_to_fp16)[name = tensor("op_707_cast")]; tensor var_709_interleave_0 = const()[name = tensor("op_709_interleave_0"), val = tensor(false)]; tensor var_709_cast = concat(axis = var_10, interleave = var_709_interleave_0, values = (var_707_cast, x1_13_cast))[name = tensor("op_709_cast")]; tensor var_710_cast = mul(x = var_709_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_710_cast")]; tensor query_states_15_cast = add(x = var_696_cast, y = var_710_cast)[name = tensor("query_states_15_cast")]; tensor transpose_93 = transpose(perm = key_states_21_perm_0, x = var_667_cast)[name = tensor("transpose_93")]; tensor var_712_cast = mul(x = transpose_93, y = cos_1_to_fp16_palettized)[name = tensor("op_712_cast")]; tensor x1_15_begin_0 = const()[name = tensor("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = tensor("x1_15_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_15_end_mask_0 = const()[name = tensor("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = transpose_93)[name = tensor("x1_15_cast")]; tensor x2_15_begin_0 = const()[name = tensor("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = tensor("x2_15_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_15_end_mask_0 = const()[name = tensor("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = transpose_93)[name = tensor("x2_15_cast")]; tensor const_65_promoted_to_fp16 = const()[name = tensor("const_65_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_723_cast = mul(x = x2_15_cast, y = const_65_promoted_to_fp16)[name = tensor("op_723_cast")]; tensor var_725_interleave_0 = const()[name = tensor("op_725_interleave_0"), val = tensor(false)]; tensor var_725_cast = concat(axis = var_10, interleave = var_725_interleave_0, values = (var_723_cast, x1_15_cast))[name = tensor("op_725_cast")]; tensor var_726_cast = mul(x = var_725_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_726_cast")]; tensor hidden_states_101_cast = add(x = var_712_cast, y = var_726_cast)[name = tensor("hidden_states_101_cast")]; tensor var_735_axes_0 = const()[name = tensor("op_735_axes_0"), val = tensor([2])]; tensor var_735_cast = expand_dims(axes = var_735_axes_0, x = hidden_states_101_cast)[name = tensor("op_735_cast")]; tensor hidden_states_103_reps_0 = const()[name = tensor("hidden_states_103_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_103_cast = tile(reps = hidden_states_103_reps_0, x = var_735_cast)[name = tensor("hidden_states_103_cast")]; tensor var_743 = const()[name = tensor("op_743"), val = tensor([1, 32, 128, 64])]; tensor key_states_23_cast = reshape(shape = var_743, x = hidden_states_103_cast)[name = tensor("key_states_23_cast")]; tensor var_752_axes_0 = const()[name = tensor("op_752_axes_0"), val = tensor([2])]; tensor transpose_92 = transpose(perm = hidden_states_105_perm_0, x = var_670_cast)[name = tensor("transpose_92")]; tensor var_752_cast = expand_dims(axes = var_752_axes_0, x = transpose_92)[name = tensor("op_752_cast")]; tensor hidden_states_107_reps_0 = const()[name = tensor("hidden_states_107_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_107_cast = tile(reps = hidden_states_107_reps_0, x = var_752_cast)[name = tensor("hidden_states_107_cast")]; tensor var_760 = const()[name = tensor("op_760"), val = tensor([1, 32, 128, 64])]; tensor value_states_15_cast = reshape(shape = var_760, x = hidden_states_107_cast)[name = tensor("value_states_15_cast")]; tensor var_762_perm_0 = const()[name = tensor("op_762_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_763_transpose_x_0 = const()[name = tensor("op_763_transpose_x_0"), val = tensor(false)]; tensor var_763_transpose_y_0 = const()[name = tensor("op_763_transpose_y_0"), val = tensor(false)]; tensor transpose_91 = transpose(perm = var_762_perm_0, x = key_states_23_cast)[name = tensor("transpose_91")]; tensor var_763_cast = matmul(transpose_x = var_763_transpose_x_0, transpose_y = var_763_transpose_y_0, x = query_states_15_cast, y = transpose_91)[name = tensor("op_763_cast")]; tensor _inversed_attn_weights_13_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_13_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_13_cast = mul(x = var_763_cast, y = _inversed_attn_weights_13_y_0_to_fp16)[name = tensor("_inversed_attn_weights_13_cast")]; tensor input_31_cast = add(x = _inversed_attn_weights_13_cast, y = attention_mask_cast)[name = tensor("input_31_cast")]; tensor var_767_cast = softmax(axis = var_10, x = input_31_cast)[name = tensor("op_767_cast")]; tensor attn_output_13_transpose_x_0 = const()[name = tensor("attn_output_13_transpose_x_0"), val = tensor(false)]; tensor attn_output_13_transpose_y_0 = const()[name = tensor("attn_output_13_transpose_y_0"), val = tensor(false)]; tensor attn_output_13_cast = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = var_767_cast, y = value_states_15_cast)[name = tensor("attn_output_13_cast")]; tensor var_770_perm_0 = const()[name = tensor("op_770_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_772 = const()[name = tensor("op_772"), val = tensor([1, 128, 2048])]; tensor transpose_90 = transpose(perm = var_770_perm_0, x = attn_output_13_cast)[name = tensor("transpose_90")]; tensor input_33_cast = reshape(shape = var_772, x = transpose_90)[name = tensor("input_33_cast")]; tensor model_model_layers_3_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101511488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103608704))), name = tensor("model_model_layers_3_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_24_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_to_fp16_palettized, x = input_33_cast)[name = tensor("linear_24_cast")]; tensor hidden_states_111_cast = add(x = hidden_states_91_cast, y = linear_24_cast)[name = tensor("hidden_states_111_cast")]; tensor var_13_promoted_to_fp16_7 = const()[name = tensor("op_13_promoted_to_fp16_7"), val = tensor(0x1p+1)]; tensor var_779_cast = pow(x = hidden_states_111_cast, y = var_13_promoted_to_fp16_7)[name = tensor("op_779_cast")]; tensor var_780 = const()[name = tensor("op_780"), val = tensor([-1])]; tensor variance_15_cast = reduce_mean(axes = var_780, keep_dims = var_23, x = var_779_cast)[name = tensor("variance_15_cast")]; tensor var_782_to_fp16 = const()[name = tensor("op_782_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_783_cast = add(x = variance_15_cast, y = var_782_to_fp16)[name = tensor("op_783_cast")]; tensor var_784_epsilon_0_to_fp16 = const()[name = tensor("op_784_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_784_cast = rsqrt(epsilon = var_784_epsilon_0_to_fp16, x = var_783_cast)[name = tensor("op_784_cast")]; tensor hidden_states_115_cast = mul(x = hidden_states_111_cast, y = var_784_cast)[name = tensor("hidden_states_115_cast")]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103608832)))]; tensor input_35_cast = mul(x = model_model_layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_115_cast)[name = tensor("input_35_cast")]; tensor model_model_layers_3_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103612992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109380224))), name = tensor("model_model_layers_3_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_25_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_3_mlp_gate_proj_weight_to_fp16_palettized, x = input_35_cast)[name = tensor("linear_25_cast")]; tensor var_796_cast = silu(x = linear_25_cast)[name = tensor("op_796_cast")]; tensor model_model_layers_3_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109380352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115147584))), name = tensor("model_model_layers_3_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_26_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_3_mlp_up_proj_weight_to_fp16_palettized, x = input_35_cast)[name = tensor("linear_26_cast")]; tensor input_39_cast = mul(x = var_796_cast, y = linear_26_cast)[name = tensor("input_39_cast")]; tensor model_model_layers_3_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115147712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120914944))), name = tensor("model_model_layers_3_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_27_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_mlp_down_proj_weight_to_fp16_palettized, x = input_39_cast)[name = tensor("linear_27_cast")]; tensor hidden_states_121_cast = add(x = hidden_states_111_cast, y = linear_27_cast)[name = tensor("hidden_states_121_cast")]; tensor var_13_promoted_to_fp16_8 = const()[name = tensor("op_13_promoted_to_fp16_8"), val = tensor(0x1p+1)]; tensor var_809_cast = pow(x = hidden_states_121_cast, y = var_13_promoted_to_fp16_8)[name = tensor("op_809_cast")]; tensor var_810 = const()[name = tensor("op_810"), val = tensor([-1])]; tensor variance_17_cast = reduce_mean(axes = var_810, keep_dims = var_23, x = var_809_cast)[name = tensor("variance_17_cast")]; tensor var_812_to_fp16 = const()[name = tensor("op_812_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_813_cast = add(x = variance_17_cast, y = var_812_to_fp16)[name = tensor("op_813_cast")]; tensor var_814_epsilon_0_to_fp16 = const()[name = tensor("op_814_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_814_cast = rsqrt(epsilon = var_814_epsilon_0_to_fp16, x = var_813_cast)[name = tensor("op_814_cast")]; tensor hidden_states_125_cast = mul(x = hidden_states_121_cast, y = var_814_cast)[name = tensor("hidden_states_125_cast")]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120915072)))]; tensor hidden_states_129_cast = mul(x = model_model_layers_4_input_layernorm_weight_to_fp16, y = hidden_states_125_cast)[name = tensor("hidden_states_129_cast")]; tensor model_model_layers_4_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120919232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123016448))), name = tensor("model_model_layers_4_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_28_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_129_cast)[name = tensor("linear_28_cast")]; tensor model_model_layers_4_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123016576))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123278784))), name = tensor("model_model_layers_4_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_29_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_129_cast)[name = tensor("linear_29_cast")]; tensor model_model_layers_4_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123278912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123541120))), name = tensor("model_model_layers_4_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_30_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_129_cast)[name = tensor("linear_30_cast")]; tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 128, 32, 64])]; tensor var_835_cast = reshape(shape = var_834, x = linear_28_cast)[name = tensor("op_835_cast")]; tensor q_9_perm_0 = const()[name = tensor("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_837 = const()[name = tensor("op_837"), val = tensor([1, 128, 4, 64])]; tensor var_838_cast = reshape(shape = var_837, x = linear_29_cast)[name = tensor("op_838_cast")]; tensor key_states_27_perm_0 = const()[name = tensor("key_states_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 128, 4, 64])]; tensor var_841_cast = reshape(shape = var_840, x = linear_30_cast)[name = tensor("op_841_cast")]; tensor hidden_states_135_perm_0 = const()[name = tensor("hidden_states_135_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_89 = transpose(perm = q_9_perm_0, x = var_835_cast)[name = tensor("transpose_89")]; tensor var_867_cast = mul(x = transpose_89, y = cos_1_to_fp16_palettized)[name = tensor("op_867_cast")]; tensor x1_17_begin_0 = const()[name = tensor("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = tensor("x1_17_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_17_end_mask_0 = const()[name = tensor("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = transpose_89)[name = tensor("x1_17_cast")]; tensor x2_17_begin_0 = const()[name = tensor("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = tensor("x2_17_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_17_end_mask_0 = const()[name = tensor("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = transpose_89)[name = tensor("x2_17_cast")]; tensor const_79_promoted_to_fp16 = const()[name = tensor("const_79_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_878_cast = mul(x = x2_17_cast, y = const_79_promoted_to_fp16)[name = tensor("op_878_cast")]; tensor var_880_interleave_0 = const()[name = tensor("op_880_interleave_0"), val = tensor(false)]; tensor var_880_cast = concat(axis = var_10, interleave = var_880_interleave_0, values = (var_878_cast, x1_17_cast))[name = tensor("op_880_cast")]; tensor var_881_cast = mul(x = var_880_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_881_cast")]; tensor query_states_19_cast = add(x = var_867_cast, y = var_881_cast)[name = tensor("query_states_19_cast")]; tensor transpose_88 = transpose(perm = key_states_27_perm_0, x = var_838_cast)[name = tensor("transpose_88")]; tensor var_883_cast = mul(x = transpose_88, y = cos_1_to_fp16_palettized)[name = tensor("op_883_cast")]; tensor x1_19_begin_0 = const()[name = tensor("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = tensor("x1_19_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_19_end_mask_0 = const()[name = tensor("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = transpose_88)[name = tensor("x1_19_cast")]; tensor x2_19_begin_0 = const()[name = tensor("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = tensor("x2_19_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_19_end_mask_0 = const()[name = tensor("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = transpose_88)[name = tensor("x2_19_cast")]; tensor const_82_promoted_to_fp16 = const()[name = tensor("const_82_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_894_cast = mul(x = x2_19_cast, y = const_82_promoted_to_fp16)[name = tensor("op_894_cast")]; tensor var_896_interleave_0 = const()[name = tensor("op_896_interleave_0"), val = tensor(false)]; tensor var_896_cast = concat(axis = var_10, interleave = var_896_interleave_0, values = (var_894_cast, x1_19_cast))[name = tensor("op_896_cast")]; tensor var_897_cast = mul(x = var_896_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_897_cast")]; tensor hidden_states_131_cast = add(x = var_883_cast, y = var_897_cast)[name = tensor("hidden_states_131_cast")]; tensor var_906_axes_0 = const()[name = tensor("op_906_axes_0"), val = tensor([2])]; tensor var_906_cast = expand_dims(axes = var_906_axes_0, x = hidden_states_131_cast)[name = tensor("op_906_cast")]; tensor hidden_states_133_reps_0 = const()[name = tensor("hidden_states_133_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_133_cast = tile(reps = hidden_states_133_reps_0, x = var_906_cast)[name = tensor("hidden_states_133_cast")]; tensor var_914 = const()[name = tensor("op_914"), val = tensor([1, 32, 128, 64])]; tensor key_states_29_cast = reshape(shape = var_914, x = hidden_states_133_cast)[name = tensor("key_states_29_cast")]; tensor var_923_axes_0 = const()[name = tensor("op_923_axes_0"), val = tensor([2])]; tensor transpose_87 = transpose(perm = hidden_states_135_perm_0, x = var_841_cast)[name = tensor("transpose_87")]; tensor var_923_cast = expand_dims(axes = var_923_axes_0, x = transpose_87)[name = tensor("op_923_cast")]; tensor hidden_states_137_reps_0 = const()[name = tensor("hidden_states_137_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_137_cast = tile(reps = hidden_states_137_reps_0, x = var_923_cast)[name = tensor("hidden_states_137_cast")]; tensor var_931 = const()[name = tensor("op_931"), val = tensor([1, 32, 128, 64])]; tensor value_states_19_cast = reshape(shape = var_931, x = hidden_states_137_cast)[name = tensor("value_states_19_cast")]; tensor var_933_perm_0 = const()[name = tensor("op_933_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_934_transpose_x_0 = const()[name = tensor("op_934_transpose_x_0"), val = tensor(false)]; tensor var_934_transpose_y_0 = const()[name = tensor("op_934_transpose_y_0"), val = tensor(false)]; tensor transpose_86 = transpose(perm = var_933_perm_0, x = key_states_29_cast)[name = tensor("transpose_86")]; tensor var_934_cast = matmul(transpose_x = var_934_transpose_x_0, transpose_y = var_934_transpose_y_0, x = query_states_19_cast, y = transpose_86)[name = tensor("op_934_cast")]; tensor _inversed_attn_weights_17_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_17_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_17_cast = mul(x = var_934_cast, y = _inversed_attn_weights_17_y_0_to_fp16)[name = tensor("_inversed_attn_weights_17_cast")]; tensor input_41_cast = add(x = _inversed_attn_weights_17_cast, y = attention_mask_cast)[name = tensor("input_41_cast")]; tensor var_938_cast = softmax(axis = var_10, x = input_41_cast)[name = tensor("op_938_cast")]; tensor attn_output_17_transpose_x_0 = const()[name = tensor("attn_output_17_transpose_x_0"), val = tensor(false)]; tensor attn_output_17_transpose_y_0 = const()[name = tensor("attn_output_17_transpose_y_0"), val = tensor(false)]; tensor attn_output_17_cast = matmul(transpose_x = attn_output_17_transpose_x_0, transpose_y = attn_output_17_transpose_y_0, x = var_938_cast, y = value_states_19_cast)[name = tensor("attn_output_17_cast")]; tensor var_941_perm_0 = const()[name = tensor("op_941_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_943 = const()[name = tensor("op_943"), val = tensor([1, 128, 2048])]; tensor transpose_85 = transpose(perm = var_941_perm_0, x = attn_output_17_cast)[name = tensor("transpose_85")]; tensor input_43_cast = reshape(shape = var_943, x = transpose_85)[name = tensor("input_43_cast")]; tensor model_model_layers_4_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123541248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125638464))), name = tensor("model_model_layers_4_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_31_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_to_fp16_palettized, x = input_43_cast)[name = tensor("linear_31_cast")]; tensor hidden_states_141_cast = add(x = hidden_states_121_cast, y = linear_31_cast)[name = tensor("hidden_states_141_cast")]; tensor var_13_promoted_to_fp16_9 = const()[name = tensor("op_13_promoted_to_fp16_9"), val = tensor(0x1p+1)]; tensor var_950_cast = pow(x = hidden_states_141_cast, y = var_13_promoted_to_fp16_9)[name = tensor("op_950_cast")]; tensor var_951 = const()[name = tensor("op_951"), val = tensor([-1])]; tensor variance_19_cast = reduce_mean(axes = var_951, keep_dims = var_23, x = var_950_cast)[name = tensor("variance_19_cast")]; tensor var_953_to_fp16 = const()[name = tensor("op_953_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_954_cast = add(x = variance_19_cast, y = var_953_to_fp16)[name = tensor("op_954_cast")]; tensor var_955_epsilon_0_to_fp16 = const()[name = tensor("op_955_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_955_cast = rsqrt(epsilon = var_955_epsilon_0_to_fp16, x = var_954_cast)[name = tensor("op_955_cast")]; tensor hidden_states_145_cast = mul(x = hidden_states_141_cast, y = var_955_cast)[name = tensor("hidden_states_145_cast")]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125638592)))]; tensor input_45_cast = mul(x = model_model_layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_145_cast)[name = tensor("input_45_cast")]; tensor model_model_layers_4_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125642752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131409984))), name = tensor("model_model_layers_4_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_32_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_4_mlp_gate_proj_weight_to_fp16_palettized, x = input_45_cast)[name = tensor("linear_32_cast")]; tensor var_967_cast = silu(x = linear_32_cast)[name = tensor("op_967_cast")]; tensor model_model_layers_4_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131410112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137177344))), name = tensor("model_model_layers_4_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_33_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_4_mlp_up_proj_weight_to_fp16_palettized, x = input_45_cast)[name = tensor("linear_33_cast")]; tensor input_49_cast = mul(x = var_967_cast, y = linear_33_cast)[name = tensor("input_49_cast")]; tensor model_model_layers_4_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137177472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142944704))), name = tensor("model_model_layers_4_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_34_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_mlp_down_proj_weight_to_fp16_palettized, x = input_49_cast)[name = tensor("linear_34_cast")]; tensor hidden_states_151_cast = add(x = hidden_states_141_cast, y = linear_34_cast)[name = tensor("hidden_states_151_cast")]; tensor var_13_promoted_to_fp16_10 = const()[name = tensor("op_13_promoted_to_fp16_10"), val = tensor(0x1p+1)]; tensor var_980_cast = pow(x = hidden_states_151_cast, y = var_13_promoted_to_fp16_10)[name = tensor("op_980_cast")]; tensor var_981 = const()[name = tensor("op_981"), val = tensor([-1])]; tensor variance_21_cast = reduce_mean(axes = var_981, keep_dims = var_23, x = var_980_cast)[name = tensor("variance_21_cast")]; tensor var_983_to_fp16 = const()[name = tensor("op_983_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_984_cast = add(x = variance_21_cast, y = var_983_to_fp16)[name = tensor("op_984_cast")]; tensor var_985_epsilon_0_to_fp16 = const()[name = tensor("op_985_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_985_cast = rsqrt(epsilon = var_985_epsilon_0_to_fp16, x = var_984_cast)[name = tensor("op_985_cast")]; tensor hidden_states_155_cast = mul(x = hidden_states_151_cast, y = var_985_cast)[name = tensor("hidden_states_155_cast")]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142944832)))]; tensor hidden_states_159_cast = mul(x = model_model_layers_5_input_layernorm_weight_to_fp16, y = hidden_states_155_cast)[name = tensor("hidden_states_159_cast")]; tensor model_model_layers_5_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142948992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145046208))), name = tensor("model_model_layers_5_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_35_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_159_cast)[name = tensor("linear_35_cast")]; tensor model_model_layers_5_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145046336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145308544))), name = tensor("model_model_layers_5_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_36_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_159_cast)[name = tensor("linear_36_cast")]; tensor model_model_layers_5_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145308672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145570880))), name = tensor("model_model_layers_5_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_37_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_159_cast)[name = tensor("linear_37_cast")]; tensor var_1005 = const()[name = tensor("op_1005"), val = tensor([1, 128, 32, 64])]; tensor var_1006_cast = reshape(shape = var_1005, x = linear_35_cast)[name = tensor("op_1006_cast")]; tensor q_11_perm_0 = const()[name = tensor("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1008 = const()[name = tensor("op_1008"), val = tensor([1, 128, 4, 64])]; tensor var_1009_cast = reshape(shape = var_1008, x = linear_36_cast)[name = tensor("op_1009_cast")]; tensor key_states_33_perm_0 = const()[name = tensor("key_states_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1011 = const()[name = tensor("op_1011"), val = tensor([1, 128, 4, 64])]; tensor var_1012_cast = reshape(shape = var_1011, x = linear_37_cast)[name = tensor("op_1012_cast")]; tensor hidden_states_165_perm_0 = const()[name = tensor("hidden_states_165_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_84 = transpose(perm = q_11_perm_0, x = var_1006_cast)[name = tensor("transpose_84")]; tensor var_1038_cast = mul(x = transpose_84, y = cos_1_to_fp16_palettized)[name = tensor("op_1038_cast")]; tensor x1_21_begin_0 = const()[name = tensor("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = tensor("x1_21_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_21_end_mask_0 = const()[name = tensor("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = transpose_84)[name = tensor("x1_21_cast")]; tensor x2_21_begin_0 = const()[name = tensor("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = tensor("x2_21_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_21_end_mask_0 = const()[name = tensor("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = transpose_84)[name = tensor("x2_21_cast")]; tensor const_96_promoted_to_fp16 = const()[name = tensor("const_96_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1049_cast = mul(x = x2_21_cast, y = const_96_promoted_to_fp16)[name = tensor("op_1049_cast")]; tensor var_1051_interleave_0 = const()[name = tensor("op_1051_interleave_0"), val = tensor(false)]; tensor var_1051_cast = concat(axis = var_10, interleave = var_1051_interleave_0, values = (var_1049_cast, x1_21_cast))[name = tensor("op_1051_cast")]; tensor var_1052_cast = mul(x = var_1051_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1052_cast")]; tensor query_states_23_cast = add(x = var_1038_cast, y = var_1052_cast)[name = tensor("query_states_23_cast")]; tensor transpose_83 = transpose(perm = key_states_33_perm_0, x = var_1009_cast)[name = tensor("transpose_83")]; tensor var_1054_cast = mul(x = transpose_83, y = cos_1_to_fp16_palettized)[name = tensor("op_1054_cast")]; tensor x1_23_begin_0 = const()[name = tensor("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = tensor("x1_23_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_23_end_mask_0 = const()[name = tensor("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = transpose_83)[name = tensor("x1_23_cast")]; tensor x2_23_begin_0 = const()[name = tensor("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = tensor("x2_23_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_23_end_mask_0 = const()[name = tensor("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = transpose_83)[name = tensor("x2_23_cast")]; tensor const_99_promoted_to_fp16 = const()[name = tensor("const_99_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1065_cast = mul(x = x2_23_cast, y = const_99_promoted_to_fp16)[name = tensor("op_1065_cast")]; tensor var_1067_interleave_0 = const()[name = tensor("op_1067_interleave_0"), val = tensor(false)]; tensor var_1067_cast = concat(axis = var_10, interleave = var_1067_interleave_0, values = (var_1065_cast, x1_23_cast))[name = tensor("op_1067_cast")]; tensor var_1068_cast = mul(x = var_1067_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1068_cast")]; tensor hidden_states_161_cast = add(x = var_1054_cast, y = var_1068_cast)[name = tensor("hidden_states_161_cast")]; tensor var_1077_axes_0 = const()[name = tensor("op_1077_axes_0"), val = tensor([2])]; tensor var_1077_cast = expand_dims(axes = var_1077_axes_0, x = hidden_states_161_cast)[name = tensor("op_1077_cast")]; tensor hidden_states_163_reps_0 = const()[name = tensor("hidden_states_163_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_163_cast = tile(reps = hidden_states_163_reps_0, x = var_1077_cast)[name = tensor("hidden_states_163_cast")]; tensor var_1085 = const()[name = tensor("op_1085"), val = tensor([1, 32, 128, 64])]; tensor key_states_35_cast = reshape(shape = var_1085, x = hidden_states_163_cast)[name = tensor("key_states_35_cast")]; tensor var_1094_axes_0 = const()[name = tensor("op_1094_axes_0"), val = tensor([2])]; tensor transpose_82 = transpose(perm = hidden_states_165_perm_0, x = var_1012_cast)[name = tensor("transpose_82")]; tensor var_1094_cast = expand_dims(axes = var_1094_axes_0, x = transpose_82)[name = tensor("op_1094_cast")]; tensor hidden_states_167_reps_0 = const()[name = tensor("hidden_states_167_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_167_cast = tile(reps = hidden_states_167_reps_0, x = var_1094_cast)[name = tensor("hidden_states_167_cast")]; tensor var_1102 = const()[name = tensor("op_1102"), val = tensor([1, 32, 128, 64])]; tensor value_states_23_cast = reshape(shape = var_1102, x = hidden_states_167_cast)[name = tensor("value_states_23_cast")]; tensor var_1104_perm_0 = const()[name = tensor("op_1104_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1105_transpose_x_0 = const()[name = tensor("op_1105_transpose_x_0"), val = tensor(false)]; tensor var_1105_transpose_y_0 = const()[name = tensor("op_1105_transpose_y_0"), val = tensor(false)]; tensor transpose_81 = transpose(perm = var_1104_perm_0, x = key_states_35_cast)[name = tensor("transpose_81")]; tensor var_1105_cast = matmul(transpose_x = var_1105_transpose_x_0, transpose_y = var_1105_transpose_y_0, x = query_states_23_cast, y = transpose_81)[name = tensor("op_1105_cast")]; tensor _inversed_attn_weights_21_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_21_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_21_cast = mul(x = var_1105_cast, y = _inversed_attn_weights_21_y_0_to_fp16)[name = tensor("_inversed_attn_weights_21_cast")]; tensor input_51_cast = add(x = _inversed_attn_weights_21_cast, y = attention_mask_cast)[name = tensor("input_51_cast")]; tensor var_1109_cast = softmax(axis = var_10, x = input_51_cast)[name = tensor("op_1109_cast")]; tensor attn_output_21_transpose_x_0 = const()[name = tensor("attn_output_21_transpose_x_0"), val = tensor(false)]; tensor attn_output_21_transpose_y_0 = const()[name = tensor("attn_output_21_transpose_y_0"), val = tensor(false)]; tensor attn_output_21_cast = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = var_1109_cast, y = value_states_23_cast)[name = tensor("attn_output_21_cast")]; tensor var_1112_perm_0 = const()[name = tensor("op_1112_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1114 = const()[name = tensor("op_1114"), val = tensor([1, 128, 2048])]; tensor transpose_80 = transpose(perm = var_1112_perm_0, x = attn_output_21_cast)[name = tensor("transpose_80")]; tensor input_53_cast = reshape(shape = var_1114, x = transpose_80)[name = tensor("input_53_cast")]; tensor model_model_layers_5_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145571008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147668224))), name = tensor("model_model_layers_5_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_38_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_to_fp16_palettized, x = input_53_cast)[name = tensor("linear_38_cast")]; tensor hidden_states_171_cast = add(x = hidden_states_151_cast, y = linear_38_cast)[name = tensor("hidden_states_171_cast")]; tensor var_13_promoted_to_fp16_11 = const()[name = tensor("op_13_promoted_to_fp16_11"), val = tensor(0x1p+1)]; tensor var_1121_cast = pow(x = hidden_states_171_cast, y = var_13_promoted_to_fp16_11)[name = tensor("op_1121_cast")]; tensor var_1122 = const()[name = tensor("op_1122"), val = tensor([-1])]; tensor variance_23_cast = reduce_mean(axes = var_1122, keep_dims = var_23, x = var_1121_cast)[name = tensor("variance_23_cast")]; tensor var_1124_to_fp16 = const()[name = tensor("op_1124_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1125_cast = add(x = variance_23_cast, y = var_1124_to_fp16)[name = tensor("op_1125_cast")]; tensor var_1126_epsilon_0_to_fp16 = const()[name = tensor("op_1126_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1126_cast = rsqrt(epsilon = var_1126_epsilon_0_to_fp16, x = var_1125_cast)[name = tensor("op_1126_cast")]; tensor hidden_states_175_cast = mul(x = hidden_states_171_cast, y = var_1126_cast)[name = tensor("hidden_states_175_cast")]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147668352)))]; tensor input_55_cast = mul(x = model_model_layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_175_cast)[name = tensor("input_55_cast")]; tensor model_model_layers_5_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147672512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153439744))), name = tensor("model_model_layers_5_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_39_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_5_mlp_gate_proj_weight_to_fp16_palettized, x = input_55_cast)[name = tensor("linear_39_cast")]; tensor var_1138_cast = silu(x = linear_39_cast)[name = tensor("op_1138_cast")]; tensor model_model_layers_5_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153439872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159207104))), name = tensor("model_model_layers_5_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_40_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_5_mlp_up_proj_weight_to_fp16_palettized, x = input_55_cast)[name = tensor("linear_40_cast")]; tensor input_59_cast = mul(x = var_1138_cast, y = linear_40_cast)[name = tensor("input_59_cast")]; tensor model_model_layers_5_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159207232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164974464))), name = tensor("model_model_layers_5_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_41_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_mlp_down_proj_weight_to_fp16_palettized, x = input_59_cast)[name = tensor("linear_41_cast")]; tensor hidden_states_181_cast = add(x = hidden_states_171_cast, y = linear_41_cast)[name = tensor("hidden_states_181_cast")]; tensor var_13_promoted_to_fp16_12 = const()[name = tensor("op_13_promoted_to_fp16_12"), val = tensor(0x1p+1)]; tensor var_1151_cast = pow(x = hidden_states_181_cast, y = var_13_promoted_to_fp16_12)[name = tensor("op_1151_cast")]; tensor var_1152 = const()[name = tensor("op_1152"), val = tensor([-1])]; tensor variance_25_cast = reduce_mean(axes = var_1152, keep_dims = var_23, x = var_1151_cast)[name = tensor("variance_25_cast")]; tensor var_1154_to_fp16 = const()[name = tensor("op_1154_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1155_cast = add(x = variance_25_cast, y = var_1154_to_fp16)[name = tensor("op_1155_cast")]; tensor var_1156_epsilon_0_to_fp16 = const()[name = tensor("op_1156_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1156_cast = rsqrt(epsilon = var_1156_epsilon_0_to_fp16, x = var_1155_cast)[name = tensor("op_1156_cast")]; tensor hidden_states_185_cast = mul(x = hidden_states_181_cast, y = var_1156_cast)[name = tensor("hidden_states_185_cast")]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164974592)))]; tensor hidden_states_189_cast = mul(x = model_model_layers_6_input_layernorm_weight_to_fp16, y = hidden_states_185_cast)[name = tensor("hidden_states_189_cast")]; tensor model_model_layers_6_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164978752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167075968))), name = tensor("model_model_layers_6_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_42_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_189_cast)[name = tensor("linear_42_cast")]; tensor model_model_layers_6_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167076096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167338304))), name = tensor("model_model_layers_6_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_43_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_189_cast)[name = tensor("linear_43_cast")]; tensor model_model_layers_6_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167338432))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167600640))), name = tensor("model_model_layers_6_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_44_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_189_cast)[name = tensor("linear_44_cast")]; tensor var_1176 = const()[name = tensor("op_1176"), val = tensor([1, 128, 32, 64])]; tensor var_1177_cast = reshape(shape = var_1176, x = linear_42_cast)[name = tensor("op_1177_cast")]; tensor q_13_perm_0 = const()[name = tensor("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1179 = const()[name = tensor("op_1179"), val = tensor([1, 128, 4, 64])]; tensor var_1180_cast = reshape(shape = var_1179, x = linear_43_cast)[name = tensor("op_1180_cast")]; tensor key_states_39_perm_0 = const()[name = tensor("key_states_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1182 = const()[name = tensor("op_1182"), val = tensor([1, 128, 4, 64])]; tensor var_1183_cast = reshape(shape = var_1182, x = linear_44_cast)[name = tensor("op_1183_cast")]; tensor hidden_states_195_perm_0 = const()[name = tensor("hidden_states_195_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_79 = transpose(perm = q_13_perm_0, x = var_1177_cast)[name = tensor("transpose_79")]; tensor var_1209_cast = mul(x = transpose_79, y = cos_1_to_fp16_palettized)[name = tensor("op_1209_cast")]; tensor x1_25_begin_0 = const()[name = tensor("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = tensor("x1_25_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_25_end_mask_0 = const()[name = tensor("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = transpose_79)[name = tensor("x1_25_cast")]; tensor x2_25_begin_0 = const()[name = tensor("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = tensor("x2_25_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_25_end_mask_0 = const()[name = tensor("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = transpose_79)[name = tensor("x2_25_cast")]; tensor const_113_promoted_to_fp16 = const()[name = tensor("const_113_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1220_cast = mul(x = x2_25_cast, y = const_113_promoted_to_fp16)[name = tensor("op_1220_cast")]; tensor var_1222_interleave_0 = const()[name = tensor("op_1222_interleave_0"), val = tensor(false)]; tensor var_1222_cast = concat(axis = var_10, interleave = var_1222_interleave_0, values = (var_1220_cast, x1_25_cast))[name = tensor("op_1222_cast")]; tensor var_1223_cast = mul(x = var_1222_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1223_cast")]; tensor query_states_27_cast = add(x = var_1209_cast, y = var_1223_cast)[name = tensor("query_states_27_cast")]; tensor transpose_78 = transpose(perm = key_states_39_perm_0, x = var_1180_cast)[name = tensor("transpose_78")]; tensor var_1225_cast = mul(x = transpose_78, y = cos_1_to_fp16_palettized)[name = tensor("op_1225_cast")]; tensor x1_27_begin_0 = const()[name = tensor("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = tensor("x1_27_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_27_end_mask_0 = const()[name = tensor("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = transpose_78)[name = tensor("x1_27_cast")]; tensor x2_27_begin_0 = const()[name = tensor("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = tensor("x2_27_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_27_end_mask_0 = const()[name = tensor("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = transpose_78)[name = tensor("x2_27_cast")]; tensor const_116_promoted_to_fp16 = const()[name = tensor("const_116_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1236_cast = mul(x = x2_27_cast, y = const_116_promoted_to_fp16)[name = tensor("op_1236_cast")]; tensor var_1238_interleave_0 = const()[name = tensor("op_1238_interleave_0"), val = tensor(false)]; tensor var_1238_cast = concat(axis = var_10, interleave = var_1238_interleave_0, values = (var_1236_cast, x1_27_cast))[name = tensor("op_1238_cast")]; tensor var_1239_cast = mul(x = var_1238_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1239_cast")]; tensor hidden_states_191_cast = add(x = var_1225_cast, y = var_1239_cast)[name = tensor("hidden_states_191_cast")]; tensor var_1248_axes_0 = const()[name = tensor("op_1248_axes_0"), val = tensor([2])]; tensor var_1248_cast = expand_dims(axes = var_1248_axes_0, x = hidden_states_191_cast)[name = tensor("op_1248_cast")]; tensor hidden_states_193_reps_0 = const()[name = tensor("hidden_states_193_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_193_cast = tile(reps = hidden_states_193_reps_0, x = var_1248_cast)[name = tensor("hidden_states_193_cast")]; tensor var_1256 = const()[name = tensor("op_1256"), val = tensor([1, 32, 128, 64])]; tensor key_states_41_cast = reshape(shape = var_1256, x = hidden_states_193_cast)[name = tensor("key_states_41_cast")]; tensor var_1265_axes_0 = const()[name = tensor("op_1265_axes_0"), val = tensor([2])]; tensor transpose_77 = transpose(perm = hidden_states_195_perm_0, x = var_1183_cast)[name = tensor("transpose_77")]; tensor var_1265_cast = expand_dims(axes = var_1265_axes_0, x = transpose_77)[name = tensor("op_1265_cast")]; tensor hidden_states_197_reps_0 = const()[name = tensor("hidden_states_197_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_197_cast = tile(reps = hidden_states_197_reps_0, x = var_1265_cast)[name = tensor("hidden_states_197_cast")]; tensor var_1273 = const()[name = tensor("op_1273"), val = tensor([1, 32, 128, 64])]; tensor value_states_27_cast = reshape(shape = var_1273, x = hidden_states_197_cast)[name = tensor("value_states_27_cast")]; tensor var_1275_perm_0 = const()[name = tensor("op_1275_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1276_transpose_x_0 = const()[name = tensor("op_1276_transpose_x_0"), val = tensor(false)]; tensor var_1276_transpose_y_0 = const()[name = tensor("op_1276_transpose_y_0"), val = tensor(false)]; tensor transpose_76 = transpose(perm = var_1275_perm_0, x = key_states_41_cast)[name = tensor("transpose_76")]; tensor var_1276_cast = matmul(transpose_x = var_1276_transpose_x_0, transpose_y = var_1276_transpose_y_0, x = query_states_27_cast, y = transpose_76)[name = tensor("op_1276_cast")]; tensor _inversed_attn_weights_25_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_25_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_25_cast = mul(x = var_1276_cast, y = _inversed_attn_weights_25_y_0_to_fp16)[name = tensor("_inversed_attn_weights_25_cast")]; tensor input_61_cast = add(x = _inversed_attn_weights_25_cast, y = attention_mask_cast)[name = tensor("input_61_cast")]; tensor var_1280_cast = softmax(axis = var_10, x = input_61_cast)[name = tensor("op_1280_cast")]; tensor attn_output_25_transpose_x_0 = const()[name = tensor("attn_output_25_transpose_x_0"), val = tensor(false)]; tensor attn_output_25_transpose_y_0 = const()[name = tensor("attn_output_25_transpose_y_0"), val = tensor(false)]; tensor attn_output_25_cast = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = var_1280_cast, y = value_states_27_cast)[name = tensor("attn_output_25_cast")]; tensor var_1283_perm_0 = const()[name = tensor("op_1283_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1285 = const()[name = tensor("op_1285"), val = tensor([1, 128, 2048])]; tensor transpose_75 = transpose(perm = var_1283_perm_0, x = attn_output_25_cast)[name = tensor("transpose_75")]; tensor input_63_cast = reshape(shape = var_1285, x = transpose_75)[name = tensor("input_63_cast")]; tensor model_model_layers_6_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167600768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169697984))), name = tensor("model_model_layers_6_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_45_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_to_fp16_palettized, x = input_63_cast)[name = tensor("linear_45_cast")]; tensor hidden_states_201_cast = add(x = hidden_states_181_cast, y = linear_45_cast)[name = tensor("hidden_states_201_cast")]; tensor var_13_promoted_to_fp16_13 = const()[name = tensor("op_13_promoted_to_fp16_13"), val = tensor(0x1p+1)]; tensor var_1292_cast = pow(x = hidden_states_201_cast, y = var_13_promoted_to_fp16_13)[name = tensor("op_1292_cast")]; tensor var_1293 = const()[name = tensor("op_1293"), val = tensor([-1])]; tensor variance_27_cast = reduce_mean(axes = var_1293, keep_dims = var_23, x = var_1292_cast)[name = tensor("variance_27_cast")]; tensor var_1295_to_fp16 = const()[name = tensor("op_1295_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1296_cast = add(x = variance_27_cast, y = var_1295_to_fp16)[name = tensor("op_1296_cast")]; tensor var_1297_epsilon_0_to_fp16 = const()[name = tensor("op_1297_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1297_cast = rsqrt(epsilon = var_1297_epsilon_0_to_fp16, x = var_1296_cast)[name = tensor("op_1297_cast")]; tensor hidden_states_205_cast = mul(x = hidden_states_201_cast, y = var_1297_cast)[name = tensor("hidden_states_205_cast")]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169698112)))]; tensor input_65_cast = mul(x = model_model_layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_205_cast)[name = tensor("input_65_cast")]; tensor model_model_layers_6_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169702272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175469504))), name = tensor("model_model_layers_6_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_46_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_6_mlp_gate_proj_weight_to_fp16_palettized, x = input_65_cast)[name = tensor("linear_46_cast")]; tensor var_1309_cast = silu(x = linear_46_cast)[name = tensor("op_1309_cast")]; tensor model_model_layers_6_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(175469632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181236864))), name = tensor("model_model_layers_6_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_47_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_6_mlp_up_proj_weight_to_fp16_palettized, x = input_65_cast)[name = tensor("linear_47_cast")]; tensor input_69_cast = mul(x = var_1309_cast, y = linear_47_cast)[name = tensor("input_69_cast")]; tensor model_model_layers_6_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181236992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187004224))), name = tensor("model_model_layers_6_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_48_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_mlp_down_proj_weight_to_fp16_palettized, x = input_69_cast)[name = tensor("linear_48_cast")]; tensor hidden_states_211_cast = add(x = hidden_states_201_cast, y = linear_48_cast)[name = tensor("hidden_states_211_cast")]; tensor var_13_promoted_to_fp16_14 = const()[name = tensor("op_13_promoted_to_fp16_14"), val = tensor(0x1p+1)]; tensor var_1322_cast = pow(x = hidden_states_211_cast, y = var_13_promoted_to_fp16_14)[name = tensor("op_1322_cast")]; tensor var_1323 = const()[name = tensor("op_1323"), val = tensor([-1])]; tensor variance_29_cast = reduce_mean(axes = var_1323, keep_dims = var_23, x = var_1322_cast)[name = tensor("variance_29_cast")]; tensor var_1325_to_fp16 = const()[name = tensor("op_1325_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1326_cast = add(x = variance_29_cast, y = var_1325_to_fp16)[name = tensor("op_1326_cast")]; tensor var_1327_epsilon_0_to_fp16 = const()[name = tensor("op_1327_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1327_cast = rsqrt(epsilon = var_1327_epsilon_0_to_fp16, x = var_1326_cast)[name = tensor("op_1327_cast")]; tensor hidden_states_215_cast = mul(x = hidden_states_211_cast, y = var_1327_cast)[name = tensor("hidden_states_215_cast")]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187004352)))]; tensor hidden_states_219_cast = mul(x = model_model_layers_7_input_layernorm_weight_to_fp16, y = hidden_states_215_cast)[name = tensor("hidden_states_219_cast")]; tensor model_model_layers_7_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187008512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189105728))), name = tensor("model_model_layers_7_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_49_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_219_cast)[name = tensor("linear_49_cast")]; tensor model_model_layers_7_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189105856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189368064))), name = tensor("model_model_layers_7_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_50_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_219_cast)[name = tensor("linear_50_cast")]; tensor model_model_layers_7_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189368192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189630400))), name = tensor("model_model_layers_7_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_51_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_219_cast)[name = tensor("linear_51_cast")]; tensor var_1347 = const()[name = tensor("op_1347"), val = tensor([1, 128, 32, 64])]; tensor var_1348_cast = reshape(shape = var_1347, x = linear_49_cast)[name = tensor("op_1348_cast")]; tensor q_15_perm_0 = const()[name = tensor("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1350 = const()[name = tensor("op_1350"), val = tensor([1, 128, 4, 64])]; tensor var_1351_cast = reshape(shape = var_1350, x = linear_50_cast)[name = tensor("op_1351_cast")]; tensor key_states_45_perm_0 = const()[name = tensor("key_states_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1353 = const()[name = tensor("op_1353"), val = tensor([1, 128, 4, 64])]; tensor var_1354_cast = reshape(shape = var_1353, x = linear_51_cast)[name = tensor("op_1354_cast")]; tensor hidden_states_225_perm_0 = const()[name = tensor("hidden_states_225_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_74 = transpose(perm = q_15_perm_0, x = var_1348_cast)[name = tensor("transpose_74")]; tensor var_1380_cast = mul(x = transpose_74, y = cos_1_to_fp16_palettized)[name = tensor("op_1380_cast")]; tensor x1_29_begin_0 = const()[name = tensor("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = tensor("x1_29_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_29_end_mask_0 = const()[name = tensor("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = transpose_74)[name = tensor("x1_29_cast")]; tensor x2_29_begin_0 = const()[name = tensor("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = tensor("x2_29_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_29_end_mask_0 = const()[name = tensor("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = transpose_74)[name = tensor("x2_29_cast")]; tensor const_130_promoted_to_fp16 = const()[name = tensor("const_130_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1391_cast = mul(x = x2_29_cast, y = const_130_promoted_to_fp16)[name = tensor("op_1391_cast")]; tensor var_1393_interleave_0 = const()[name = tensor("op_1393_interleave_0"), val = tensor(false)]; tensor var_1393_cast = concat(axis = var_10, interleave = var_1393_interleave_0, values = (var_1391_cast, x1_29_cast))[name = tensor("op_1393_cast")]; tensor var_1394_cast = mul(x = var_1393_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1394_cast")]; tensor query_states_31_cast = add(x = var_1380_cast, y = var_1394_cast)[name = tensor("query_states_31_cast")]; tensor transpose_73 = transpose(perm = key_states_45_perm_0, x = var_1351_cast)[name = tensor("transpose_73")]; tensor var_1396_cast = mul(x = transpose_73, y = cos_1_to_fp16_palettized)[name = tensor("op_1396_cast")]; tensor x1_31_begin_0 = const()[name = tensor("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = tensor("x1_31_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_31_end_mask_0 = const()[name = tensor("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = transpose_73)[name = tensor("x1_31_cast")]; tensor x2_31_begin_0 = const()[name = tensor("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_31_end_0 = const()[name = tensor("x2_31_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_31_end_mask_0 = const()[name = tensor("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = transpose_73)[name = tensor("x2_31_cast")]; tensor const_133_promoted_to_fp16 = const()[name = tensor("const_133_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1407_cast = mul(x = x2_31_cast, y = const_133_promoted_to_fp16)[name = tensor("op_1407_cast")]; tensor var_1409_interleave_0 = const()[name = tensor("op_1409_interleave_0"), val = tensor(false)]; tensor var_1409_cast = concat(axis = var_10, interleave = var_1409_interleave_0, values = (var_1407_cast, x1_31_cast))[name = tensor("op_1409_cast")]; tensor var_1410_cast = mul(x = var_1409_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1410_cast")]; tensor hidden_states_221_cast = add(x = var_1396_cast, y = var_1410_cast)[name = tensor("hidden_states_221_cast")]; tensor var_1419_axes_0 = const()[name = tensor("op_1419_axes_0"), val = tensor([2])]; tensor var_1419_cast = expand_dims(axes = var_1419_axes_0, x = hidden_states_221_cast)[name = tensor("op_1419_cast")]; tensor hidden_states_223_reps_0 = const()[name = tensor("hidden_states_223_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_223_cast = tile(reps = hidden_states_223_reps_0, x = var_1419_cast)[name = tensor("hidden_states_223_cast")]; tensor var_1427 = const()[name = tensor("op_1427"), val = tensor([1, 32, 128, 64])]; tensor key_states_47_cast = reshape(shape = var_1427, x = hidden_states_223_cast)[name = tensor("key_states_47_cast")]; tensor var_1436_axes_0 = const()[name = tensor("op_1436_axes_0"), val = tensor([2])]; tensor transpose_72 = transpose(perm = hidden_states_225_perm_0, x = var_1354_cast)[name = tensor("transpose_72")]; tensor var_1436_cast = expand_dims(axes = var_1436_axes_0, x = transpose_72)[name = tensor("op_1436_cast")]; tensor hidden_states_227_reps_0 = const()[name = tensor("hidden_states_227_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_227_cast = tile(reps = hidden_states_227_reps_0, x = var_1436_cast)[name = tensor("hidden_states_227_cast")]; tensor var_1444 = const()[name = tensor("op_1444"), val = tensor([1, 32, 128, 64])]; tensor value_states_31_cast = reshape(shape = var_1444, x = hidden_states_227_cast)[name = tensor("value_states_31_cast")]; tensor var_1446_perm_0 = const()[name = tensor("op_1446_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1447_transpose_x_0 = const()[name = tensor("op_1447_transpose_x_0"), val = tensor(false)]; tensor var_1447_transpose_y_0 = const()[name = tensor("op_1447_transpose_y_0"), val = tensor(false)]; tensor transpose_71 = transpose(perm = var_1446_perm_0, x = key_states_47_cast)[name = tensor("transpose_71")]; tensor var_1447_cast = matmul(transpose_x = var_1447_transpose_x_0, transpose_y = var_1447_transpose_y_0, x = query_states_31_cast, y = transpose_71)[name = tensor("op_1447_cast")]; tensor _inversed_attn_weights_29_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_29_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_29_cast = mul(x = var_1447_cast, y = _inversed_attn_weights_29_y_0_to_fp16)[name = tensor("_inversed_attn_weights_29_cast")]; tensor input_71_cast = add(x = _inversed_attn_weights_29_cast, y = attention_mask_cast)[name = tensor("input_71_cast")]; tensor var_1451_cast = softmax(axis = var_10, x = input_71_cast)[name = tensor("op_1451_cast")]; tensor attn_output_29_transpose_x_0 = const()[name = tensor("attn_output_29_transpose_x_0"), val = tensor(false)]; tensor attn_output_29_transpose_y_0 = const()[name = tensor("attn_output_29_transpose_y_0"), val = tensor(false)]; tensor attn_output_29_cast = matmul(transpose_x = attn_output_29_transpose_x_0, transpose_y = attn_output_29_transpose_y_0, x = var_1451_cast, y = value_states_31_cast)[name = tensor("attn_output_29_cast")]; tensor var_1454_perm_0 = const()[name = tensor("op_1454_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1456 = const()[name = tensor("op_1456"), val = tensor([1, 128, 2048])]; tensor transpose_70 = transpose(perm = var_1454_perm_0, x = attn_output_29_cast)[name = tensor("transpose_70")]; tensor input_73_cast = reshape(shape = var_1456, x = transpose_70)[name = tensor("input_73_cast")]; tensor model_model_layers_7_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(189630528))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191727744))), name = tensor("model_model_layers_7_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_52_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_to_fp16_palettized, x = input_73_cast)[name = tensor("linear_52_cast")]; tensor hidden_states_231_cast = add(x = hidden_states_211_cast, y = linear_52_cast)[name = tensor("hidden_states_231_cast")]; tensor var_13_promoted_to_fp16_15 = const()[name = tensor("op_13_promoted_to_fp16_15"), val = tensor(0x1p+1)]; tensor var_1463_cast = pow(x = hidden_states_231_cast, y = var_13_promoted_to_fp16_15)[name = tensor("op_1463_cast")]; tensor var_1464 = const()[name = tensor("op_1464"), val = tensor([-1])]; tensor variance_31_cast = reduce_mean(axes = var_1464, keep_dims = var_23, x = var_1463_cast)[name = tensor("variance_31_cast")]; tensor var_1466_to_fp16 = const()[name = tensor("op_1466_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1467_cast = add(x = variance_31_cast, y = var_1466_to_fp16)[name = tensor("op_1467_cast")]; tensor var_1468_epsilon_0_to_fp16 = const()[name = tensor("op_1468_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1468_cast = rsqrt(epsilon = var_1468_epsilon_0_to_fp16, x = var_1467_cast)[name = tensor("op_1468_cast")]; tensor hidden_states_235_cast = mul(x = hidden_states_231_cast, y = var_1468_cast)[name = tensor("hidden_states_235_cast")]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191727872)))]; tensor input_75_cast = mul(x = model_model_layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_235_cast)[name = tensor("input_75_cast")]; tensor model_model_layers_7_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191732032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197499264))), name = tensor("model_model_layers_7_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_53_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_7_mlp_gate_proj_weight_to_fp16_palettized, x = input_75_cast)[name = tensor("linear_53_cast")]; tensor var_1480_cast = silu(x = linear_53_cast)[name = tensor("op_1480_cast")]; tensor model_model_layers_7_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197499392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203266624))), name = tensor("model_model_layers_7_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_54_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_7_mlp_up_proj_weight_to_fp16_palettized, x = input_75_cast)[name = tensor("linear_54_cast")]; tensor input_79_cast = mul(x = var_1480_cast, y = linear_54_cast)[name = tensor("input_79_cast")]; tensor model_model_layers_7_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203266752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209033984))), name = tensor("model_model_layers_7_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_55_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_mlp_down_proj_weight_to_fp16_palettized, x = input_79_cast)[name = tensor("linear_55_cast")]; tensor hidden_states_241_cast = add(x = hidden_states_231_cast, y = linear_55_cast)[name = tensor("hidden_states_241_cast")]; tensor var_13_promoted_to_fp16_16 = const()[name = tensor("op_13_promoted_to_fp16_16"), val = tensor(0x1p+1)]; tensor var_1493_cast = pow(x = hidden_states_241_cast, y = var_13_promoted_to_fp16_16)[name = tensor("op_1493_cast")]; tensor var_1494 = const()[name = tensor("op_1494"), val = tensor([-1])]; tensor variance_33_cast = reduce_mean(axes = var_1494, keep_dims = var_23, x = var_1493_cast)[name = tensor("variance_33_cast")]; tensor var_1496_to_fp16 = const()[name = tensor("op_1496_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1497_cast = add(x = variance_33_cast, y = var_1496_to_fp16)[name = tensor("op_1497_cast")]; tensor var_1498_epsilon_0_to_fp16 = const()[name = tensor("op_1498_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1498_cast = rsqrt(epsilon = var_1498_epsilon_0_to_fp16, x = var_1497_cast)[name = tensor("op_1498_cast")]; tensor hidden_states_245_cast = mul(x = hidden_states_241_cast, y = var_1498_cast)[name = tensor("hidden_states_245_cast")]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209034112)))]; tensor hidden_states_249_cast = mul(x = model_model_layers_8_input_layernorm_weight_to_fp16, y = hidden_states_245_cast)[name = tensor("hidden_states_249_cast")]; tensor model_model_layers_8_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209038272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211135488))), name = tensor("model_model_layers_8_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_56_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_249_cast)[name = tensor("linear_56_cast")]; tensor model_model_layers_8_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211135616))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211397824))), name = tensor("model_model_layers_8_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_57_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_249_cast)[name = tensor("linear_57_cast")]; tensor model_model_layers_8_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211397952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211660160))), name = tensor("model_model_layers_8_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_58_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_249_cast)[name = tensor("linear_58_cast")]; tensor var_1518 = const()[name = tensor("op_1518"), val = tensor([1, 128, 32, 64])]; tensor var_1519_cast = reshape(shape = var_1518, x = linear_56_cast)[name = tensor("op_1519_cast")]; tensor q_17_perm_0 = const()[name = tensor("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1521 = const()[name = tensor("op_1521"), val = tensor([1, 128, 4, 64])]; tensor var_1522_cast = reshape(shape = var_1521, x = linear_57_cast)[name = tensor("op_1522_cast")]; tensor key_states_51_perm_0 = const()[name = tensor("key_states_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1524 = const()[name = tensor("op_1524"), val = tensor([1, 128, 4, 64])]; tensor var_1525_cast = reshape(shape = var_1524, x = linear_58_cast)[name = tensor("op_1525_cast")]; tensor hidden_states_255_perm_0 = const()[name = tensor("hidden_states_255_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_69 = transpose(perm = q_17_perm_0, x = var_1519_cast)[name = tensor("transpose_69")]; tensor var_1551_cast = mul(x = transpose_69, y = cos_1_to_fp16_palettized)[name = tensor("op_1551_cast")]; tensor x1_33_begin_0 = const()[name = tensor("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = tensor("x1_33_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_33_end_mask_0 = const()[name = tensor("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = transpose_69)[name = tensor("x1_33_cast")]; tensor x2_33_begin_0 = const()[name = tensor("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_33_end_0 = const()[name = tensor("x2_33_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_33_end_mask_0 = const()[name = tensor("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = transpose_69)[name = tensor("x2_33_cast")]; tensor const_147_promoted_to_fp16 = const()[name = tensor("const_147_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1562_cast = mul(x = x2_33_cast, y = const_147_promoted_to_fp16)[name = tensor("op_1562_cast")]; tensor var_1564_interleave_0 = const()[name = tensor("op_1564_interleave_0"), val = tensor(false)]; tensor var_1564_cast = concat(axis = var_10, interleave = var_1564_interleave_0, values = (var_1562_cast, x1_33_cast))[name = tensor("op_1564_cast")]; tensor var_1565_cast = mul(x = var_1564_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1565_cast")]; tensor query_states_35_cast = add(x = var_1551_cast, y = var_1565_cast)[name = tensor("query_states_35_cast")]; tensor transpose_68 = transpose(perm = key_states_51_perm_0, x = var_1522_cast)[name = tensor("transpose_68")]; tensor var_1567_cast = mul(x = transpose_68, y = cos_1_to_fp16_palettized)[name = tensor("op_1567_cast")]; tensor x1_35_begin_0 = const()[name = tensor("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = tensor("x1_35_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_35_end_mask_0 = const()[name = tensor("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = transpose_68)[name = tensor("x1_35_cast")]; tensor x2_35_begin_0 = const()[name = tensor("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_35_end_0 = const()[name = tensor("x2_35_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_35_end_mask_0 = const()[name = tensor("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = transpose_68)[name = tensor("x2_35_cast")]; tensor const_150_promoted_to_fp16 = const()[name = tensor("const_150_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1578_cast = mul(x = x2_35_cast, y = const_150_promoted_to_fp16)[name = tensor("op_1578_cast")]; tensor var_1580_interleave_0 = const()[name = tensor("op_1580_interleave_0"), val = tensor(false)]; tensor var_1580_cast = concat(axis = var_10, interleave = var_1580_interleave_0, values = (var_1578_cast, x1_35_cast))[name = tensor("op_1580_cast")]; tensor var_1581_cast = mul(x = var_1580_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1581_cast")]; tensor hidden_states_251_cast = add(x = var_1567_cast, y = var_1581_cast)[name = tensor("hidden_states_251_cast")]; tensor var_1590_axes_0 = const()[name = tensor("op_1590_axes_0"), val = tensor([2])]; tensor var_1590_cast = expand_dims(axes = var_1590_axes_0, x = hidden_states_251_cast)[name = tensor("op_1590_cast")]; tensor hidden_states_253_reps_0 = const()[name = tensor("hidden_states_253_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_253_cast = tile(reps = hidden_states_253_reps_0, x = var_1590_cast)[name = tensor("hidden_states_253_cast")]; tensor var_1598 = const()[name = tensor("op_1598"), val = tensor([1, 32, 128, 64])]; tensor key_states_53_cast = reshape(shape = var_1598, x = hidden_states_253_cast)[name = tensor("key_states_53_cast")]; tensor var_1607_axes_0 = const()[name = tensor("op_1607_axes_0"), val = tensor([2])]; tensor transpose_67 = transpose(perm = hidden_states_255_perm_0, x = var_1525_cast)[name = tensor("transpose_67")]; tensor var_1607_cast = expand_dims(axes = var_1607_axes_0, x = transpose_67)[name = tensor("op_1607_cast")]; tensor hidden_states_257_reps_0 = const()[name = tensor("hidden_states_257_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_257_cast = tile(reps = hidden_states_257_reps_0, x = var_1607_cast)[name = tensor("hidden_states_257_cast")]; tensor var_1615 = const()[name = tensor("op_1615"), val = tensor([1, 32, 128, 64])]; tensor value_states_35_cast = reshape(shape = var_1615, x = hidden_states_257_cast)[name = tensor("value_states_35_cast")]; tensor var_1617_perm_0 = const()[name = tensor("op_1617_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1618_transpose_x_0 = const()[name = tensor("op_1618_transpose_x_0"), val = tensor(false)]; tensor var_1618_transpose_y_0 = const()[name = tensor("op_1618_transpose_y_0"), val = tensor(false)]; tensor transpose_66 = transpose(perm = var_1617_perm_0, x = key_states_53_cast)[name = tensor("transpose_66")]; tensor var_1618_cast = matmul(transpose_x = var_1618_transpose_x_0, transpose_y = var_1618_transpose_y_0, x = query_states_35_cast, y = transpose_66)[name = tensor("op_1618_cast")]; tensor _inversed_attn_weights_33_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_33_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_33_cast = mul(x = var_1618_cast, y = _inversed_attn_weights_33_y_0_to_fp16)[name = tensor("_inversed_attn_weights_33_cast")]; tensor input_81_cast = add(x = _inversed_attn_weights_33_cast, y = attention_mask_cast)[name = tensor("input_81_cast")]; tensor var_1622_cast = softmax(axis = var_10, x = input_81_cast)[name = tensor("op_1622_cast")]; tensor attn_output_33_transpose_x_0 = const()[name = tensor("attn_output_33_transpose_x_0"), val = tensor(false)]; tensor attn_output_33_transpose_y_0 = const()[name = tensor("attn_output_33_transpose_y_0"), val = tensor(false)]; tensor attn_output_33_cast = matmul(transpose_x = attn_output_33_transpose_x_0, transpose_y = attn_output_33_transpose_y_0, x = var_1622_cast, y = value_states_35_cast)[name = tensor("attn_output_33_cast")]; tensor var_1625_perm_0 = const()[name = tensor("op_1625_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1627 = const()[name = tensor("op_1627"), val = tensor([1, 128, 2048])]; tensor transpose_65 = transpose(perm = var_1625_perm_0, x = attn_output_33_cast)[name = tensor("transpose_65")]; tensor input_83_cast = reshape(shape = var_1627, x = transpose_65)[name = tensor("input_83_cast")]; tensor model_model_layers_8_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211660288))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213757504))), name = tensor("model_model_layers_8_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_59_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_to_fp16_palettized, x = input_83_cast)[name = tensor("linear_59_cast")]; tensor hidden_states_261_cast = add(x = hidden_states_241_cast, y = linear_59_cast)[name = tensor("hidden_states_261_cast")]; tensor var_13_promoted_to_fp16_17 = const()[name = tensor("op_13_promoted_to_fp16_17"), val = tensor(0x1p+1)]; tensor var_1634_cast = pow(x = hidden_states_261_cast, y = var_13_promoted_to_fp16_17)[name = tensor("op_1634_cast")]; tensor var_1635 = const()[name = tensor("op_1635"), val = tensor([-1])]; tensor variance_35_cast = reduce_mean(axes = var_1635, keep_dims = var_23, x = var_1634_cast)[name = tensor("variance_35_cast")]; tensor var_1637_to_fp16 = const()[name = tensor("op_1637_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1638_cast = add(x = variance_35_cast, y = var_1637_to_fp16)[name = tensor("op_1638_cast")]; tensor var_1639_epsilon_0_to_fp16 = const()[name = tensor("op_1639_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1639_cast = rsqrt(epsilon = var_1639_epsilon_0_to_fp16, x = var_1638_cast)[name = tensor("op_1639_cast")]; tensor hidden_states_265_cast = mul(x = hidden_states_261_cast, y = var_1639_cast)[name = tensor("hidden_states_265_cast")]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213757632)))]; tensor input_85_cast = mul(x = model_model_layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_265_cast)[name = tensor("input_85_cast")]; tensor model_model_layers_8_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(213761792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219529024))), name = tensor("model_model_layers_8_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_60_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_8_mlp_gate_proj_weight_to_fp16_palettized, x = input_85_cast)[name = tensor("linear_60_cast")]; tensor var_1651_cast = silu(x = linear_60_cast)[name = tensor("op_1651_cast")]; tensor model_model_layers_8_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(219529152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225296384))), name = tensor("model_model_layers_8_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_61_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_8_mlp_up_proj_weight_to_fp16_palettized, x = input_85_cast)[name = tensor("linear_61_cast")]; tensor input_89_cast = mul(x = var_1651_cast, y = linear_61_cast)[name = tensor("input_89_cast")]; tensor model_model_layers_8_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225296512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(231063744))), name = tensor("model_model_layers_8_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_62_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_mlp_down_proj_weight_to_fp16_palettized, x = input_89_cast)[name = tensor("linear_62_cast")]; tensor hidden_states_271_cast = add(x = hidden_states_261_cast, y = linear_62_cast)[name = tensor("hidden_states_271_cast")]; tensor var_13_promoted_to_fp16_18 = const()[name = tensor("op_13_promoted_to_fp16_18"), val = tensor(0x1p+1)]; tensor var_1664_cast = pow(x = hidden_states_271_cast, y = var_13_promoted_to_fp16_18)[name = tensor("op_1664_cast")]; tensor var_1665 = const()[name = tensor("op_1665"), val = tensor([-1])]; tensor variance_37_cast = reduce_mean(axes = var_1665, keep_dims = var_23, x = var_1664_cast)[name = tensor("variance_37_cast")]; tensor var_1667_to_fp16 = const()[name = tensor("op_1667_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1668_cast = add(x = variance_37_cast, y = var_1667_to_fp16)[name = tensor("op_1668_cast")]; tensor var_1669_epsilon_0_to_fp16 = const()[name = tensor("op_1669_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1669_cast = rsqrt(epsilon = var_1669_epsilon_0_to_fp16, x = var_1668_cast)[name = tensor("op_1669_cast")]; tensor hidden_states_275_cast = mul(x = hidden_states_271_cast, y = var_1669_cast)[name = tensor("hidden_states_275_cast")]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(231063872)))]; tensor hidden_states_279_cast = mul(x = model_model_layers_9_input_layernorm_weight_to_fp16, y = hidden_states_275_cast)[name = tensor("hidden_states_279_cast")]; tensor model_model_layers_9_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(231068032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233165248))), name = tensor("model_model_layers_9_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_63_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_279_cast)[name = tensor("linear_63_cast")]; tensor model_model_layers_9_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233165376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233427584))), name = tensor("model_model_layers_9_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_64_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_279_cast)[name = tensor("linear_64_cast")]; tensor model_model_layers_9_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233427712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233689920))), name = tensor("model_model_layers_9_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_65_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_279_cast)[name = tensor("linear_65_cast")]; tensor var_1689 = const()[name = tensor("op_1689"), val = tensor([1, 128, 32, 64])]; tensor var_1690_cast = reshape(shape = var_1689, x = linear_63_cast)[name = tensor("op_1690_cast")]; tensor q_19_perm_0 = const()[name = tensor("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1692 = const()[name = tensor("op_1692"), val = tensor([1, 128, 4, 64])]; tensor var_1693_cast = reshape(shape = var_1692, x = linear_64_cast)[name = tensor("op_1693_cast")]; tensor key_states_57_perm_0 = const()[name = tensor("key_states_57_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1695 = const()[name = tensor("op_1695"), val = tensor([1, 128, 4, 64])]; tensor var_1696_cast = reshape(shape = var_1695, x = linear_65_cast)[name = tensor("op_1696_cast")]; tensor hidden_states_285_perm_0 = const()[name = tensor("hidden_states_285_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_64 = transpose(perm = q_19_perm_0, x = var_1690_cast)[name = tensor("transpose_64")]; tensor var_1722_cast = mul(x = transpose_64, y = cos_1_to_fp16_palettized)[name = tensor("op_1722_cast")]; tensor x1_37_begin_0 = const()[name = tensor("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = tensor("x1_37_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_37_end_mask_0 = const()[name = tensor("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = transpose_64)[name = tensor("x1_37_cast")]; tensor x2_37_begin_0 = const()[name = tensor("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_37_end_0 = const()[name = tensor("x2_37_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_37_end_mask_0 = const()[name = tensor("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = transpose_64)[name = tensor("x2_37_cast")]; tensor const_164_promoted_to_fp16 = const()[name = tensor("const_164_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1733_cast = mul(x = x2_37_cast, y = const_164_promoted_to_fp16)[name = tensor("op_1733_cast")]; tensor var_1735_interleave_0 = const()[name = tensor("op_1735_interleave_0"), val = tensor(false)]; tensor var_1735_cast = concat(axis = var_10, interleave = var_1735_interleave_0, values = (var_1733_cast, x1_37_cast))[name = tensor("op_1735_cast")]; tensor var_1736_cast = mul(x = var_1735_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1736_cast")]; tensor query_states_39_cast = add(x = var_1722_cast, y = var_1736_cast)[name = tensor("query_states_39_cast")]; tensor transpose_63 = transpose(perm = key_states_57_perm_0, x = var_1693_cast)[name = tensor("transpose_63")]; tensor var_1738_cast = mul(x = transpose_63, y = cos_1_to_fp16_palettized)[name = tensor("op_1738_cast")]; tensor x1_39_begin_0 = const()[name = tensor("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = tensor("x1_39_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_39_end_mask_0 = const()[name = tensor("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = transpose_63)[name = tensor("x1_39_cast")]; tensor x2_39_begin_0 = const()[name = tensor("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_39_end_0 = const()[name = tensor("x2_39_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_39_end_mask_0 = const()[name = tensor("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = transpose_63)[name = tensor("x2_39_cast")]; tensor const_167_promoted_to_fp16 = const()[name = tensor("const_167_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1749_cast = mul(x = x2_39_cast, y = const_167_promoted_to_fp16)[name = tensor("op_1749_cast")]; tensor var_1751_interleave_0 = const()[name = tensor("op_1751_interleave_0"), val = tensor(false)]; tensor var_1751_cast = concat(axis = var_10, interleave = var_1751_interleave_0, values = (var_1749_cast, x1_39_cast))[name = tensor("op_1751_cast")]; tensor var_1752_cast = mul(x = var_1751_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1752_cast")]; tensor hidden_states_281_cast = add(x = var_1738_cast, y = var_1752_cast)[name = tensor("hidden_states_281_cast")]; tensor var_1761_axes_0 = const()[name = tensor("op_1761_axes_0"), val = tensor([2])]; tensor var_1761_cast = expand_dims(axes = var_1761_axes_0, x = hidden_states_281_cast)[name = tensor("op_1761_cast")]; tensor hidden_states_283_reps_0 = const()[name = tensor("hidden_states_283_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_283_cast = tile(reps = hidden_states_283_reps_0, x = var_1761_cast)[name = tensor("hidden_states_283_cast")]; tensor var_1769 = const()[name = tensor("op_1769"), val = tensor([1, 32, 128, 64])]; tensor key_states_59_cast = reshape(shape = var_1769, x = hidden_states_283_cast)[name = tensor("key_states_59_cast")]; tensor var_1778_axes_0 = const()[name = tensor("op_1778_axes_0"), val = tensor([2])]; tensor transpose_62 = transpose(perm = hidden_states_285_perm_0, x = var_1696_cast)[name = tensor("transpose_62")]; tensor var_1778_cast = expand_dims(axes = var_1778_axes_0, x = transpose_62)[name = tensor("op_1778_cast")]; tensor hidden_states_287_reps_0 = const()[name = tensor("hidden_states_287_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_287_cast = tile(reps = hidden_states_287_reps_0, x = var_1778_cast)[name = tensor("hidden_states_287_cast")]; tensor var_1786 = const()[name = tensor("op_1786"), val = tensor([1, 32, 128, 64])]; tensor value_states_39_cast = reshape(shape = var_1786, x = hidden_states_287_cast)[name = tensor("value_states_39_cast")]; tensor var_1788_perm_0 = const()[name = tensor("op_1788_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1789_transpose_x_0 = const()[name = tensor("op_1789_transpose_x_0"), val = tensor(false)]; tensor var_1789_transpose_y_0 = const()[name = tensor("op_1789_transpose_y_0"), val = tensor(false)]; tensor transpose_61 = transpose(perm = var_1788_perm_0, x = key_states_59_cast)[name = tensor("transpose_61")]; tensor var_1789_cast = matmul(transpose_x = var_1789_transpose_x_0, transpose_y = var_1789_transpose_y_0, x = query_states_39_cast, y = transpose_61)[name = tensor("op_1789_cast")]; tensor _inversed_attn_weights_37_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_37_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_37_cast = mul(x = var_1789_cast, y = _inversed_attn_weights_37_y_0_to_fp16)[name = tensor("_inversed_attn_weights_37_cast")]; tensor input_91_cast = add(x = _inversed_attn_weights_37_cast, y = attention_mask_cast)[name = tensor("input_91_cast")]; tensor var_1793_cast = softmax(axis = var_10, x = input_91_cast)[name = tensor("op_1793_cast")]; tensor attn_output_37_transpose_x_0 = const()[name = tensor("attn_output_37_transpose_x_0"), val = tensor(false)]; tensor attn_output_37_transpose_y_0 = const()[name = tensor("attn_output_37_transpose_y_0"), val = tensor(false)]; tensor attn_output_37_cast = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = var_1793_cast, y = value_states_39_cast)[name = tensor("attn_output_37_cast")]; tensor var_1796_perm_0 = const()[name = tensor("op_1796_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1798 = const()[name = tensor("op_1798"), val = tensor([1, 128, 2048])]; tensor transpose_60 = transpose(perm = var_1796_perm_0, x = attn_output_37_cast)[name = tensor("transpose_60")]; tensor input_93_cast = reshape(shape = var_1798, x = transpose_60)[name = tensor("input_93_cast")]; tensor model_model_layers_9_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233690048))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235787264))), name = tensor("model_model_layers_9_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_66_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_to_fp16_palettized, x = input_93_cast)[name = tensor("linear_66_cast")]; tensor hidden_states_291_cast = add(x = hidden_states_271_cast, y = linear_66_cast)[name = tensor("hidden_states_291_cast")]; tensor var_13_promoted_to_fp16_19 = const()[name = tensor("op_13_promoted_to_fp16_19"), val = tensor(0x1p+1)]; tensor var_1805_cast = pow(x = hidden_states_291_cast, y = var_13_promoted_to_fp16_19)[name = tensor("op_1805_cast")]; tensor var_1806 = const()[name = tensor("op_1806"), val = tensor([-1])]; tensor variance_39_cast = reduce_mean(axes = var_1806, keep_dims = var_23, x = var_1805_cast)[name = tensor("variance_39_cast")]; tensor var_1808_to_fp16 = const()[name = tensor("op_1808_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1809_cast = add(x = variance_39_cast, y = var_1808_to_fp16)[name = tensor("op_1809_cast")]; tensor var_1810_epsilon_0_to_fp16 = const()[name = tensor("op_1810_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1810_cast = rsqrt(epsilon = var_1810_epsilon_0_to_fp16, x = var_1809_cast)[name = tensor("op_1810_cast")]; tensor hidden_states_295_cast = mul(x = hidden_states_291_cast, y = var_1810_cast)[name = tensor("hidden_states_295_cast")]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235787392)))]; tensor input_95_cast = mul(x = model_model_layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_295_cast)[name = tensor("input_95_cast")]; tensor model_model_layers_9_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235791552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241558784))), name = tensor("model_model_layers_9_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_67_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_9_mlp_gate_proj_weight_to_fp16_palettized, x = input_95_cast)[name = tensor("linear_67_cast")]; tensor var_1822_cast = silu(x = linear_67_cast)[name = tensor("op_1822_cast")]; tensor model_model_layers_9_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241558912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247326144))), name = tensor("model_model_layers_9_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_68_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_9_mlp_up_proj_weight_to_fp16_palettized, x = input_95_cast)[name = tensor("linear_68_cast")]; tensor input_99_cast = mul(x = var_1822_cast, y = linear_68_cast)[name = tensor("input_99_cast")]; tensor model_model_layers_9_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247326272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253093504))), name = tensor("model_model_layers_9_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_69_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_mlp_down_proj_weight_to_fp16_palettized, x = input_99_cast)[name = tensor("linear_69_cast")]; tensor hidden_states_301_cast = add(x = hidden_states_291_cast, y = linear_69_cast)[name = tensor("hidden_states_301_cast")]; tensor var_13_promoted_to_fp16_20 = const()[name = tensor("op_13_promoted_to_fp16_20"), val = tensor(0x1p+1)]; tensor var_1835_cast = pow(x = hidden_states_301_cast, y = var_13_promoted_to_fp16_20)[name = tensor("op_1835_cast")]; tensor var_1836 = const()[name = tensor("op_1836"), val = tensor([-1])]; tensor variance_41_cast = reduce_mean(axes = var_1836, keep_dims = var_23, x = var_1835_cast)[name = tensor("variance_41_cast")]; tensor var_1838_to_fp16 = const()[name = tensor("op_1838_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1839_cast = add(x = variance_41_cast, y = var_1838_to_fp16)[name = tensor("op_1839_cast")]; tensor var_1840_epsilon_0_to_fp16 = const()[name = tensor("op_1840_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1840_cast = rsqrt(epsilon = var_1840_epsilon_0_to_fp16, x = var_1839_cast)[name = tensor("op_1840_cast")]; tensor hidden_states_305_cast = mul(x = hidden_states_301_cast, y = var_1840_cast)[name = tensor("hidden_states_305_cast")]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253093632)))]; tensor hidden_states_309_cast = mul(x = model_model_layers_10_input_layernorm_weight_to_fp16, y = hidden_states_305_cast)[name = tensor("hidden_states_309_cast")]; tensor model_model_layers_10_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253097792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255195008))), name = tensor("model_model_layers_10_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_70_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_309_cast)[name = tensor("linear_70_cast")]; tensor model_model_layers_10_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255195136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255457344))), name = tensor("model_model_layers_10_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_71_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_309_cast)[name = tensor("linear_71_cast")]; tensor model_model_layers_10_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255457472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255719680))), name = tensor("model_model_layers_10_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_72_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_309_cast)[name = tensor("linear_72_cast")]; tensor var_1860 = const()[name = tensor("op_1860"), val = tensor([1, 128, 32, 64])]; tensor var_1861_cast = reshape(shape = var_1860, x = linear_70_cast)[name = tensor("op_1861_cast")]; tensor q_21_perm_0 = const()[name = tensor("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1863 = const()[name = tensor("op_1863"), val = tensor([1, 128, 4, 64])]; tensor var_1864_cast = reshape(shape = var_1863, x = linear_71_cast)[name = tensor("op_1864_cast")]; tensor key_states_63_perm_0 = const()[name = tensor("key_states_63_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1866 = const()[name = tensor("op_1866"), val = tensor([1, 128, 4, 64])]; tensor var_1867_cast = reshape(shape = var_1866, x = linear_72_cast)[name = tensor("op_1867_cast")]; tensor hidden_states_315_perm_0 = const()[name = tensor("hidden_states_315_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_59 = transpose(perm = q_21_perm_0, x = var_1861_cast)[name = tensor("transpose_59")]; tensor var_1893_cast = mul(x = transpose_59, y = cos_1_to_fp16_palettized)[name = tensor("op_1893_cast")]; tensor x1_41_begin_0 = const()[name = tensor("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = tensor("x1_41_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_41_end_mask_0 = const()[name = tensor("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = transpose_59)[name = tensor("x1_41_cast")]; tensor x2_41_begin_0 = const()[name = tensor("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_41_end_0 = const()[name = tensor("x2_41_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_41_end_mask_0 = const()[name = tensor("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = transpose_59)[name = tensor("x2_41_cast")]; tensor const_181_promoted_to_fp16 = const()[name = tensor("const_181_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1904_cast = mul(x = x2_41_cast, y = const_181_promoted_to_fp16)[name = tensor("op_1904_cast")]; tensor var_1906_interleave_0 = const()[name = tensor("op_1906_interleave_0"), val = tensor(false)]; tensor var_1906_cast = concat(axis = var_10, interleave = var_1906_interleave_0, values = (var_1904_cast, x1_41_cast))[name = tensor("op_1906_cast")]; tensor var_1907_cast = mul(x = var_1906_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1907_cast")]; tensor query_states_43_cast = add(x = var_1893_cast, y = var_1907_cast)[name = tensor("query_states_43_cast")]; tensor transpose_58 = transpose(perm = key_states_63_perm_0, x = var_1864_cast)[name = tensor("transpose_58")]; tensor var_1909_cast = mul(x = transpose_58, y = cos_1_to_fp16_palettized)[name = tensor("op_1909_cast")]; tensor x1_43_begin_0 = const()[name = tensor("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = tensor("x1_43_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_43_end_mask_0 = const()[name = tensor("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = transpose_58)[name = tensor("x1_43_cast")]; tensor x2_43_begin_0 = const()[name = tensor("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_43_end_0 = const()[name = tensor("x2_43_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_43_end_mask_0 = const()[name = tensor("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = transpose_58)[name = tensor("x2_43_cast")]; tensor const_184_promoted_to_fp16 = const()[name = tensor("const_184_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_1920_cast = mul(x = x2_43_cast, y = const_184_promoted_to_fp16)[name = tensor("op_1920_cast")]; tensor var_1922_interleave_0 = const()[name = tensor("op_1922_interleave_0"), val = tensor(false)]; tensor var_1922_cast = concat(axis = var_10, interleave = var_1922_interleave_0, values = (var_1920_cast, x1_43_cast))[name = tensor("op_1922_cast")]; tensor var_1923_cast = mul(x = var_1922_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_1923_cast")]; tensor hidden_states_311_cast = add(x = var_1909_cast, y = var_1923_cast)[name = tensor("hidden_states_311_cast")]; tensor var_1932_axes_0 = const()[name = tensor("op_1932_axes_0"), val = tensor([2])]; tensor var_1932_cast = expand_dims(axes = var_1932_axes_0, x = hidden_states_311_cast)[name = tensor("op_1932_cast")]; tensor hidden_states_313_reps_0 = const()[name = tensor("hidden_states_313_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_313_cast = tile(reps = hidden_states_313_reps_0, x = var_1932_cast)[name = tensor("hidden_states_313_cast")]; tensor var_1940 = const()[name = tensor("op_1940"), val = tensor([1, 32, 128, 64])]; tensor key_states_65_cast = reshape(shape = var_1940, x = hidden_states_313_cast)[name = tensor("key_states_65_cast")]; tensor var_1949_axes_0 = const()[name = tensor("op_1949_axes_0"), val = tensor([2])]; tensor transpose_57 = transpose(perm = hidden_states_315_perm_0, x = var_1867_cast)[name = tensor("transpose_57")]; tensor var_1949_cast = expand_dims(axes = var_1949_axes_0, x = transpose_57)[name = tensor("op_1949_cast")]; tensor hidden_states_317_reps_0 = const()[name = tensor("hidden_states_317_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_317_cast = tile(reps = hidden_states_317_reps_0, x = var_1949_cast)[name = tensor("hidden_states_317_cast")]; tensor var_1957 = const()[name = tensor("op_1957"), val = tensor([1, 32, 128, 64])]; tensor value_states_43_cast = reshape(shape = var_1957, x = hidden_states_317_cast)[name = tensor("value_states_43_cast")]; tensor var_1959_perm_0 = const()[name = tensor("op_1959_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_1960_transpose_x_0 = const()[name = tensor("op_1960_transpose_x_0"), val = tensor(false)]; tensor var_1960_transpose_y_0 = const()[name = tensor("op_1960_transpose_y_0"), val = tensor(false)]; tensor transpose_56 = transpose(perm = var_1959_perm_0, x = key_states_65_cast)[name = tensor("transpose_56")]; tensor var_1960_cast = matmul(transpose_x = var_1960_transpose_x_0, transpose_y = var_1960_transpose_y_0, x = query_states_43_cast, y = transpose_56)[name = tensor("op_1960_cast")]; tensor _inversed_attn_weights_41_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_41_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_41_cast = mul(x = var_1960_cast, y = _inversed_attn_weights_41_y_0_to_fp16)[name = tensor("_inversed_attn_weights_41_cast")]; tensor input_101_cast = add(x = _inversed_attn_weights_41_cast, y = attention_mask_cast)[name = tensor("input_101_cast")]; tensor var_1964_cast = softmax(axis = var_10, x = input_101_cast)[name = tensor("op_1964_cast")]; tensor attn_output_41_transpose_x_0 = const()[name = tensor("attn_output_41_transpose_x_0"), val = tensor(false)]; tensor attn_output_41_transpose_y_0 = const()[name = tensor("attn_output_41_transpose_y_0"), val = tensor(false)]; tensor attn_output_41_cast = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = var_1964_cast, y = value_states_43_cast)[name = tensor("attn_output_41_cast")]; tensor var_1967_perm_0 = const()[name = tensor("op_1967_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_1969 = const()[name = tensor("op_1969"), val = tensor([1, 128, 2048])]; tensor transpose_55 = transpose(perm = var_1967_perm_0, x = attn_output_41_cast)[name = tensor("transpose_55")]; tensor input_103_cast = reshape(shape = var_1969, x = transpose_55)[name = tensor("input_103_cast")]; tensor model_model_layers_10_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(255719808))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257817024))), name = tensor("model_model_layers_10_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_73_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_to_fp16_palettized, x = input_103_cast)[name = tensor("linear_73_cast")]; tensor hidden_states_321_cast = add(x = hidden_states_301_cast, y = linear_73_cast)[name = tensor("hidden_states_321_cast")]; tensor var_13_promoted_to_fp16_21 = const()[name = tensor("op_13_promoted_to_fp16_21"), val = tensor(0x1p+1)]; tensor var_1976_cast = pow(x = hidden_states_321_cast, y = var_13_promoted_to_fp16_21)[name = tensor("op_1976_cast")]; tensor var_1977 = const()[name = tensor("op_1977"), val = tensor([-1])]; tensor variance_43_cast = reduce_mean(axes = var_1977, keep_dims = var_23, x = var_1976_cast)[name = tensor("variance_43_cast")]; tensor var_1979_to_fp16 = const()[name = tensor("op_1979_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_1980_cast = add(x = variance_43_cast, y = var_1979_to_fp16)[name = tensor("op_1980_cast")]; tensor var_1981_epsilon_0_to_fp16 = const()[name = tensor("op_1981_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_1981_cast = rsqrt(epsilon = var_1981_epsilon_0_to_fp16, x = var_1980_cast)[name = tensor("op_1981_cast")]; tensor hidden_states_325_cast = mul(x = hidden_states_321_cast, y = var_1981_cast)[name = tensor("hidden_states_325_cast")]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257817152)))]; tensor input_105_cast = mul(x = model_model_layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_325_cast)[name = tensor("input_105_cast")]; tensor model_model_layers_10_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(257821312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263588544))), name = tensor("model_model_layers_10_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_74_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_10_mlp_gate_proj_weight_to_fp16_palettized, x = input_105_cast)[name = tensor("linear_74_cast")]; tensor var_1993_cast = silu(x = linear_74_cast)[name = tensor("op_1993_cast")]; tensor model_model_layers_10_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263588672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(269355904))), name = tensor("model_model_layers_10_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_75_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_10_mlp_up_proj_weight_to_fp16_palettized, x = input_105_cast)[name = tensor("linear_75_cast")]; tensor input_109_cast = mul(x = var_1993_cast, y = linear_75_cast)[name = tensor("input_109_cast")]; tensor model_model_layers_10_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(269356032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275123264))), name = tensor("model_model_layers_10_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_76_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_mlp_down_proj_weight_to_fp16_palettized, x = input_109_cast)[name = tensor("linear_76_cast")]; tensor hidden_states_331_cast = add(x = hidden_states_321_cast, y = linear_76_cast)[name = tensor("hidden_states_331_cast")]; tensor var_13_promoted_to_fp16_22 = const()[name = tensor("op_13_promoted_to_fp16_22"), val = tensor(0x1p+1)]; tensor var_2006_cast = pow(x = hidden_states_331_cast, y = var_13_promoted_to_fp16_22)[name = tensor("op_2006_cast")]; tensor var_2007 = const()[name = tensor("op_2007"), val = tensor([-1])]; tensor variance_45_cast = reduce_mean(axes = var_2007, keep_dims = var_23, x = var_2006_cast)[name = tensor("variance_45_cast")]; tensor var_2009_to_fp16 = const()[name = tensor("op_2009_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2010_cast = add(x = variance_45_cast, y = var_2009_to_fp16)[name = tensor("op_2010_cast")]; tensor var_2011_epsilon_0_to_fp16 = const()[name = tensor("op_2011_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2011_cast = rsqrt(epsilon = var_2011_epsilon_0_to_fp16, x = var_2010_cast)[name = tensor("op_2011_cast")]; tensor hidden_states_335_cast = mul(x = hidden_states_331_cast, y = var_2011_cast)[name = tensor("hidden_states_335_cast")]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275123392)))]; tensor hidden_states_339_cast = mul(x = model_model_layers_11_input_layernorm_weight_to_fp16, y = hidden_states_335_cast)[name = tensor("hidden_states_339_cast")]; tensor model_model_layers_11_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275127552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277224768))), name = tensor("model_model_layers_11_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_77_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_339_cast)[name = tensor("linear_77_cast")]; tensor model_model_layers_11_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277224896))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277487104))), name = tensor("model_model_layers_11_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_78_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_339_cast)[name = tensor("linear_78_cast")]; tensor model_model_layers_11_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277487232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277749440))), name = tensor("model_model_layers_11_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_79_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_339_cast)[name = tensor("linear_79_cast")]; tensor var_2031 = const()[name = tensor("op_2031"), val = tensor([1, 128, 32, 64])]; tensor var_2032_cast = reshape(shape = var_2031, x = linear_77_cast)[name = tensor("op_2032_cast")]; tensor q_23_perm_0 = const()[name = tensor("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2034 = const()[name = tensor("op_2034"), val = tensor([1, 128, 4, 64])]; tensor var_2035_cast = reshape(shape = var_2034, x = linear_78_cast)[name = tensor("op_2035_cast")]; tensor key_states_69_perm_0 = const()[name = tensor("key_states_69_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2037 = const()[name = tensor("op_2037"), val = tensor([1, 128, 4, 64])]; tensor var_2038_cast = reshape(shape = var_2037, x = linear_79_cast)[name = tensor("op_2038_cast")]; tensor hidden_states_345_perm_0 = const()[name = tensor("hidden_states_345_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_54 = transpose(perm = q_23_perm_0, x = var_2032_cast)[name = tensor("transpose_54")]; tensor var_2064_cast = mul(x = transpose_54, y = cos_1_to_fp16_palettized)[name = tensor("op_2064_cast")]; tensor x1_45_begin_0 = const()[name = tensor("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = tensor("x1_45_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_45_end_mask_0 = const()[name = tensor("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = transpose_54)[name = tensor("x1_45_cast")]; tensor x2_45_begin_0 = const()[name = tensor("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_45_end_0 = const()[name = tensor("x2_45_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_45_end_mask_0 = const()[name = tensor("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = transpose_54)[name = tensor("x2_45_cast")]; tensor const_198_promoted_to_fp16 = const()[name = tensor("const_198_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2075_cast = mul(x = x2_45_cast, y = const_198_promoted_to_fp16)[name = tensor("op_2075_cast")]; tensor var_2077_interleave_0 = const()[name = tensor("op_2077_interleave_0"), val = tensor(false)]; tensor var_2077_cast = concat(axis = var_10, interleave = var_2077_interleave_0, values = (var_2075_cast, x1_45_cast))[name = tensor("op_2077_cast")]; tensor var_2078_cast = mul(x = var_2077_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2078_cast")]; tensor query_states_47_cast = add(x = var_2064_cast, y = var_2078_cast)[name = tensor("query_states_47_cast")]; tensor transpose_53 = transpose(perm = key_states_69_perm_0, x = var_2035_cast)[name = tensor("transpose_53")]; tensor var_2080_cast = mul(x = transpose_53, y = cos_1_to_fp16_palettized)[name = tensor("op_2080_cast")]; tensor x1_47_begin_0 = const()[name = tensor("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = tensor("x1_47_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_47_end_mask_0 = const()[name = tensor("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = transpose_53)[name = tensor("x1_47_cast")]; tensor x2_47_begin_0 = const()[name = tensor("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_47_end_0 = const()[name = tensor("x2_47_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_47_end_mask_0 = const()[name = tensor("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = transpose_53)[name = tensor("x2_47_cast")]; tensor const_201_promoted_to_fp16 = const()[name = tensor("const_201_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2091_cast = mul(x = x2_47_cast, y = const_201_promoted_to_fp16)[name = tensor("op_2091_cast")]; tensor var_2093_interleave_0 = const()[name = tensor("op_2093_interleave_0"), val = tensor(false)]; tensor var_2093_cast = concat(axis = var_10, interleave = var_2093_interleave_0, values = (var_2091_cast, x1_47_cast))[name = tensor("op_2093_cast")]; tensor var_2094_cast = mul(x = var_2093_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2094_cast")]; tensor hidden_states_341_cast = add(x = var_2080_cast, y = var_2094_cast)[name = tensor("hidden_states_341_cast")]; tensor var_2103_axes_0 = const()[name = tensor("op_2103_axes_0"), val = tensor([2])]; tensor var_2103_cast = expand_dims(axes = var_2103_axes_0, x = hidden_states_341_cast)[name = tensor("op_2103_cast")]; tensor hidden_states_343_reps_0 = const()[name = tensor("hidden_states_343_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_343_cast = tile(reps = hidden_states_343_reps_0, x = var_2103_cast)[name = tensor("hidden_states_343_cast")]; tensor var_2111 = const()[name = tensor("op_2111"), val = tensor([1, 32, 128, 64])]; tensor key_states_71_cast = reshape(shape = var_2111, x = hidden_states_343_cast)[name = tensor("key_states_71_cast")]; tensor var_2120_axes_0 = const()[name = tensor("op_2120_axes_0"), val = tensor([2])]; tensor transpose_52 = transpose(perm = hidden_states_345_perm_0, x = var_2038_cast)[name = tensor("transpose_52")]; tensor var_2120_cast = expand_dims(axes = var_2120_axes_0, x = transpose_52)[name = tensor("op_2120_cast")]; tensor hidden_states_347_reps_0 = const()[name = tensor("hidden_states_347_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_347_cast = tile(reps = hidden_states_347_reps_0, x = var_2120_cast)[name = tensor("hidden_states_347_cast")]; tensor var_2128 = const()[name = tensor("op_2128"), val = tensor([1, 32, 128, 64])]; tensor value_states_47_cast = reshape(shape = var_2128, x = hidden_states_347_cast)[name = tensor("value_states_47_cast")]; tensor var_2130_perm_0 = const()[name = tensor("op_2130_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2131_transpose_x_0 = const()[name = tensor("op_2131_transpose_x_0"), val = tensor(false)]; tensor var_2131_transpose_y_0 = const()[name = tensor("op_2131_transpose_y_0"), val = tensor(false)]; tensor transpose_51 = transpose(perm = var_2130_perm_0, x = key_states_71_cast)[name = tensor("transpose_51")]; tensor var_2131_cast = matmul(transpose_x = var_2131_transpose_x_0, transpose_y = var_2131_transpose_y_0, x = query_states_47_cast, y = transpose_51)[name = tensor("op_2131_cast")]; tensor _inversed_attn_weights_45_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_45_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_45_cast = mul(x = var_2131_cast, y = _inversed_attn_weights_45_y_0_to_fp16)[name = tensor("_inversed_attn_weights_45_cast")]; tensor input_111_cast = add(x = _inversed_attn_weights_45_cast, y = attention_mask_cast)[name = tensor("input_111_cast")]; tensor var_2135_cast = softmax(axis = var_10, x = input_111_cast)[name = tensor("op_2135_cast")]; tensor attn_output_45_transpose_x_0 = const()[name = tensor("attn_output_45_transpose_x_0"), val = tensor(false)]; tensor attn_output_45_transpose_y_0 = const()[name = tensor("attn_output_45_transpose_y_0"), val = tensor(false)]; tensor attn_output_45_cast = matmul(transpose_x = attn_output_45_transpose_x_0, transpose_y = attn_output_45_transpose_y_0, x = var_2135_cast, y = value_states_47_cast)[name = tensor("attn_output_45_cast")]; tensor var_2138_perm_0 = const()[name = tensor("op_2138_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2140 = const()[name = tensor("op_2140"), val = tensor([1, 128, 2048])]; tensor transpose_50 = transpose(perm = var_2138_perm_0, x = attn_output_45_cast)[name = tensor("transpose_50")]; tensor input_113_cast = reshape(shape = var_2140, x = transpose_50)[name = tensor("input_113_cast")]; tensor model_model_layers_11_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(277749568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279846784))), name = tensor("model_model_layers_11_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_80_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_to_fp16_palettized, x = input_113_cast)[name = tensor("linear_80_cast")]; tensor hidden_states_351_cast = add(x = hidden_states_331_cast, y = linear_80_cast)[name = tensor("hidden_states_351_cast")]; tensor var_13_promoted_to_fp16_23 = const()[name = tensor("op_13_promoted_to_fp16_23"), val = tensor(0x1p+1)]; tensor var_2147_cast = pow(x = hidden_states_351_cast, y = var_13_promoted_to_fp16_23)[name = tensor("op_2147_cast")]; tensor var_2148 = const()[name = tensor("op_2148"), val = tensor([-1])]; tensor variance_47_cast = reduce_mean(axes = var_2148, keep_dims = var_23, x = var_2147_cast)[name = tensor("variance_47_cast")]; tensor var_2150_to_fp16 = const()[name = tensor("op_2150_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2151_cast = add(x = variance_47_cast, y = var_2150_to_fp16)[name = tensor("op_2151_cast")]; tensor var_2152_epsilon_0_to_fp16 = const()[name = tensor("op_2152_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2152_cast = rsqrt(epsilon = var_2152_epsilon_0_to_fp16, x = var_2151_cast)[name = tensor("op_2152_cast")]; tensor hidden_states_355_cast = mul(x = hidden_states_351_cast, y = var_2152_cast)[name = tensor("hidden_states_355_cast")]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279846912)))]; tensor input_115_cast = mul(x = model_model_layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_355_cast)[name = tensor("input_115_cast")]; tensor model_model_layers_11_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279851072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285618304))), name = tensor("model_model_layers_11_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_81_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_11_mlp_gate_proj_weight_to_fp16_palettized, x = input_115_cast)[name = tensor("linear_81_cast")]; tensor var_2164_cast = silu(x = linear_81_cast)[name = tensor("op_2164_cast")]; tensor model_model_layers_11_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285618432))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291385664))), name = tensor("model_model_layers_11_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_82_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_11_mlp_up_proj_weight_to_fp16_palettized, x = input_115_cast)[name = tensor("linear_82_cast")]; tensor input_119_cast = mul(x = var_2164_cast, y = linear_82_cast)[name = tensor("input_119_cast")]; tensor model_model_layers_11_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291385792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297153024))), name = tensor("model_model_layers_11_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_83_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_mlp_down_proj_weight_to_fp16_palettized, x = input_119_cast)[name = tensor("linear_83_cast")]; tensor hidden_states_361_cast = add(x = hidden_states_351_cast, y = linear_83_cast)[name = tensor("hidden_states_361_cast")]; tensor var_13_promoted_to_fp16_24 = const()[name = tensor("op_13_promoted_to_fp16_24"), val = tensor(0x1p+1)]; tensor var_2177_cast = pow(x = hidden_states_361_cast, y = var_13_promoted_to_fp16_24)[name = tensor("op_2177_cast")]; tensor var_2178 = const()[name = tensor("op_2178"), val = tensor([-1])]; tensor variance_49_cast = reduce_mean(axes = var_2178, keep_dims = var_23, x = var_2177_cast)[name = tensor("variance_49_cast")]; tensor var_2180_to_fp16 = const()[name = tensor("op_2180_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2181_cast = add(x = variance_49_cast, y = var_2180_to_fp16)[name = tensor("op_2181_cast")]; tensor var_2182_epsilon_0_to_fp16 = const()[name = tensor("op_2182_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2182_cast = rsqrt(epsilon = var_2182_epsilon_0_to_fp16, x = var_2181_cast)[name = tensor("op_2182_cast")]; tensor hidden_states_365_cast = mul(x = hidden_states_361_cast, y = var_2182_cast)[name = tensor("hidden_states_365_cast")]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297153152)))]; tensor hidden_states_369_cast = mul(x = model_model_layers_12_input_layernorm_weight_to_fp16, y = hidden_states_365_cast)[name = tensor("hidden_states_369_cast")]; tensor model_model_layers_12_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297157312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299254528))), name = tensor("model_model_layers_12_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_84_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_369_cast)[name = tensor("linear_84_cast")]; tensor model_model_layers_12_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299254656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299516864))), name = tensor("model_model_layers_12_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_85_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_369_cast)[name = tensor("linear_85_cast")]; tensor model_model_layers_12_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299516992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299779200))), name = tensor("model_model_layers_12_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_86_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_369_cast)[name = tensor("linear_86_cast")]; tensor var_2202 = const()[name = tensor("op_2202"), val = tensor([1, 128, 32, 64])]; tensor var_2203_cast = reshape(shape = var_2202, x = linear_84_cast)[name = tensor("op_2203_cast")]; tensor q_25_perm_0 = const()[name = tensor("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2205 = const()[name = tensor("op_2205"), val = tensor([1, 128, 4, 64])]; tensor var_2206_cast = reshape(shape = var_2205, x = linear_85_cast)[name = tensor("op_2206_cast")]; tensor key_states_75_perm_0 = const()[name = tensor("key_states_75_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2208 = const()[name = tensor("op_2208"), val = tensor([1, 128, 4, 64])]; tensor var_2209_cast = reshape(shape = var_2208, x = linear_86_cast)[name = tensor("op_2209_cast")]; tensor hidden_states_375_perm_0 = const()[name = tensor("hidden_states_375_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_49 = transpose(perm = q_25_perm_0, x = var_2203_cast)[name = tensor("transpose_49")]; tensor var_2235_cast = mul(x = transpose_49, y = cos_1_to_fp16_palettized)[name = tensor("op_2235_cast")]; tensor x1_49_begin_0 = const()[name = tensor("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = tensor("x1_49_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_49_end_mask_0 = const()[name = tensor("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = transpose_49)[name = tensor("x1_49_cast")]; tensor x2_49_begin_0 = const()[name = tensor("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_49_end_0 = const()[name = tensor("x2_49_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_49_end_mask_0 = const()[name = tensor("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = transpose_49)[name = tensor("x2_49_cast")]; tensor const_215_promoted_to_fp16 = const()[name = tensor("const_215_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2246_cast = mul(x = x2_49_cast, y = const_215_promoted_to_fp16)[name = tensor("op_2246_cast")]; tensor var_2248_interleave_0 = const()[name = tensor("op_2248_interleave_0"), val = tensor(false)]; tensor var_2248_cast = concat(axis = var_10, interleave = var_2248_interleave_0, values = (var_2246_cast, x1_49_cast))[name = tensor("op_2248_cast")]; tensor var_2249_cast = mul(x = var_2248_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2249_cast")]; tensor query_states_51_cast = add(x = var_2235_cast, y = var_2249_cast)[name = tensor("query_states_51_cast")]; tensor transpose_48 = transpose(perm = key_states_75_perm_0, x = var_2206_cast)[name = tensor("transpose_48")]; tensor var_2251_cast = mul(x = transpose_48, y = cos_1_to_fp16_palettized)[name = tensor("op_2251_cast")]; tensor x1_51_begin_0 = const()[name = tensor("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = tensor("x1_51_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_51_end_mask_0 = const()[name = tensor("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = transpose_48)[name = tensor("x1_51_cast")]; tensor x2_51_begin_0 = const()[name = tensor("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_51_end_0 = const()[name = tensor("x2_51_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_51_end_mask_0 = const()[name = tensor("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = transpose_48)[name = tensor("x2_51_cast")]; tensor const_218_promoted_to_fp16 = const()[name = tensor("const_218_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2262_cast = mul(x = x2_51_cast, y = const_218_promoted_to_fp16)[name = tensor("op_2262_cast")]; tensor var_2264_interleave_0 = const()[name = tensor("op_2264_interleave_0"), val = tensor(false)]; tensor var_2264_cast = concat(axis = var_10, interleave = var_2264_interleave_0, values = (var_2262_cast, x1_51_cast))[name = tensor("op_2264_cast")]; tensor var_2265_cast = mul(x = var_2264_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2265_cast")]; tensor hidden_states_371_cast = add(x = var_2251_cast, y = var_2265_cast)[name = tensor("hidden_states_371_cast")]; tensor var_2274_axes_0 = const()[name = tensor("op_2274_axes_0"), val = tensor([2])]; tensor var_2274_cast = expand_dims(axes = var_2274_axes_0, x = hidden_states_371_cast)[name = tensor("op_2274_cast")]; tensor hidden_states_373_reps_0 = const()[name = tensor("hidden_states_373_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_373_cast = tile(reps = hidden_states_373_reps_0, x = var_2274_cast)[name = tensor("hidden_states_373_cast")]; tensor var_2282 = const()[name = tensor("op_2282"), val = tensor([1, 32, 128, 64])]; tensor key_states_77_cast = reshape(shape = var_2282, x = hidden_states_373_cast)[name = tensor("key_states_77_cast")]; tensor var_2291_axes_0 = const()[name = tensor("op_2291_axes_0"), val = tensor([2])]; tensor transpose_47 = transpose(perm = hidden_states_375_perm_0, x = var_2209_cast)[name = tensor("transpose_47")]; tensor var_2291_cast = expand_dims(axes = var_2291_axes_0, x = transpose_47)[name = tensor("op_2291_cast")]; tensor hidden_states_377_reps_0 = const()[name = tensor("hidden_states_377_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_377_cast = tile(reps = hidden_states_377_reps_0, x = var_2291_cast)[name = tensor("hidden_states_377_cast")]; tensor var_2299 = const()[name = tensor("op_2299"), val = tensor([1, 32, 128, 64])]; tensor value_states_51_cast = reshape(shape = var_2299, x = hidden_states_377_cast)[name = tensor("value_states_51_cast")]; tensor var_2301_perm_0 = const()[name = tensor("op_2301_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2302_transpose_x_0 = const()[name = tensor("op_2302_transpose_x_0"), val = tensor(false)]; tensor var_2302_transpose_y_0 = const()[name = tensor("op_2302_transpose_y_0"), val = tensor(false)]; tensor transpose_46 = transpose(perm = var_2301_perm_0, x = key_states_77_cast)[name = tensor("transpose_46")]; tensor var_2302_cast = matmul(transpose_x = var_2302_transpose_x_0, transpose_y = var_2302_transpose_y_0, x = query_states_51_cast, y = transpose_46)[name = tensor("op_2302_cast")]; tensor _inversed_attn_weights_49_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_49_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_49_cast = mul(x = var_2302_cast, y = _inversed_attn_weights_49_y_0_to_fp16)[name = tensor("_inversed_attn_weights_49_cast")]; tensor input_121_cast = add(x = _inversed_attn_weights_49_cast, y = attention_mask_cast)[name = tensor("input_121_cast")]; tensor var_2306_cast = softmax(axis = var_10, x = input_121_cast)[name = tensor("op_2306_cast")]; tensor attn_output_49_transpose_x_0 = const()[name = tensor("attn_output_49_transpose_x_0"), val = tensor(false)]; tensor attn_output_49_transpose_y_0 = const()[name = tensor("attn_output_49_transpose_y_0"), val = tensor(false)]; tensor attn_output_49_cast = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = var_2306_cast, y = value_states_51_cast)[name = tensor("attn_output_49_cast")]; tensor var_2309_perm_0 = const()[name = tensor("op_2309_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2311 = const()[name = tensor("op_2311"), val = tensor([1, 128, 2048])]; tensor transpose_45 = transpose(perm = var_2309_perm_0, x = attn_output_49_cast)[name = tensor("transpose_45")]; tensor input_123_cast = reshape(shape = var_2311, x = transpose_45)[name = tensor("input_123_cast")]; tensor model_model_layers_12_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299779328))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301876544))), name = tensor("model_model_layers_12_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_87_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_to_fp16_palettized, x = input_123_cast)[name = tensor("linear_87_cast")]; tensor hidden_states_381_cast = add(x = hidden_states_361_cast, y = linear_87_cast)[name = tensor("hidden_states_381_cast")]; tensor var_13_promoted_to_fp16_25 = const()[name = tensor("op_13_promoted_to_fp16_25"), val = tensor(0x1p+1)]; tensor var_2318_cast = pow(x = hidden_states_381_cast, y = var_13_promoted_to_fp16_25)[name = tensor("op_2318_cast")]; tensor var_2319 = const()[name = tensor("op_2319"), val = tensor([-1])]; tensor variance_51_cast = reduce_mean(axes = var_2319, keep_dims = var_23, x = var_2318_cast)[name = tensor("variance_51_cast")]; tensor var_2321_to_fp16 = const()[name = tensor("op_2321_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2322_cast = add(x = variance_51_cast, y = var_2321_to_fp16)[name = tensor("op_2322_cast")]; tensor var_2323_epsilon_0_to_fp16 = const()[name = tensor("op_2323_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2323_cast = rsqrt(epsilon = var_2323_epsilon_0_to_fp16, x = var_2322_cast)[name = tensor("op_2323_cast")]; tensor hidden_states_385_cast = mul(x = hidden_states_381_cast, y = var_2323_cast)[name = tensor("hidden_states_385_cast")]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301876672)))]; tensor input_125_cast = mul(x = model_model_layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_385_cast)[name = tensor("input_125_cast")]; tensor model_model_layers_12_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301880832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307648064))), name = tensor("model_model_layers_12_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_88_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_12_mlp_gate_proj_weight_to_fp16_palettized, x = input_125_cast)[name = tensor("linear_88_cast")]; tensor var_2335_cast = silu(x = linear_88_cast)[name = tensor("op_2335_cast")]; tensor model_model_layers_12_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(307648192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313415424))), name = tensor("model_model_layers_12_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_89_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_12_mlp_up_proj_weight_to_fp16_palettized, x = input_125_cast)[name = tensor("linear_89_cast")]; tensor input_129_cast = mul(x = var_2335_cast, y = linear_89_cast)[name = tensor("input_129_cast")]; tensor model_model_layers_12_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313415552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(319182784))), name = tensor("model_model_layers_12_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_90_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_mlp_down_proj_weight_to_fp16_palettized, x = input_129_cast)[name = tensor("linear_90_cast")]; tensor hidden_states_391_cast = add(x = hidden_states_381_cast, y = linear_90_cast)[name = tensor("hidden_states_391_cast")]; tensor var_13_promoted_to_fp16_26 = const()[name = tensor("op_13_promoted_to_fp16_26"), val = tensor(0x1p+1)]; tensor var_2348_cast = pow(x = hidden_states_391_cast, y = var_13_promoted_to_fp16_26)[name = tensor("op_2348_cast")]; tensor var_2349 = const()[name = tensor("op_2349"), val = tensor([-1])]; tensor variance_53_cast = reduce_mean(axes = var_2349, keep_dims = var_23, x = var_2348_cast)[name = tensor("variance_53_cast")]; tensor var_2351_to_fp16 = const()[name = tensor("op_2351_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2352_cast = add(x = variance_53_cast, y = var_2351_to_fp16)[name = tensor("op_2352_cast")]; tensor var_2353_epsilon_0_to_fp16 = const()[name = tensor("op_2353_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2353_cast = rsqrt(epsilon = var_2353_epsilon_0_to_fp16, x = var_2352_cast)[name = tensor("op_2353_cast")]; tensor hidden_states_395_cast = mul(x = hidden_states_391_cast, y = var_2353_cast)[name = tensor("hidden_states_395_cast")]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(319182912)))]; tensor hidden_states_399_cast = mul(x = model_model_layers_13_input_layernorm_weight_to_fp16, y = hidden_states_395_cast)[name = tensor("hidden_states_399_cast")]; tensor model_model_layers_13_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(319187072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321284288))), name = tensor("model_model_layers_13_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_91_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_399_cast)[name = tensor("linear_91_cast")]; tensor model_model_layers_13_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321284416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321546624))), name = tensor("model_model_layers_13_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_92_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_399_cast)[name = tensor("linear_92_cast")]; tensor model_model_layers_13_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321546752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321808960))), name = tensor("model_model_layers_13_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_93_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_399_cast)[name = tensor("linear_93_cast")]; tensor var_2373 = const()[name = tensor("op_2373"), val = tensor([1, 128, 32, 64])]; tensor var_2374_cast = reshape(shape = var_2373, x = linear_91_cast)[name = tensor("op_2374_cast")]; tensor q_27_perm_0 = const()[name = tensor("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2376 = const()[name = tensor("op_2376"), val = tensor([1, 128, 4, 64])]; tensor var_2377_cast = reshape(shape = var_2376, x = linear_92_cast)[name = tensor("op_2377_cast")]; tensor key_states_81_perm_0 = const()[name = tensor("key_states_81_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2379 = const()[name = tensor("op_2379"), val = tensor([1, 128, 4, 64])]; tensor var_2380_cast = reshape(shape = var_2379, x = linear_93_cast)[name = tensor("op_2380_cast")]; tensor hidden_states_405_perm_0 = const()[name = tensor("hidden_states_405_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_44 = transpose(perm = q_27_perm_0, x = var_2374_cast)[name = tensor("transpose_44")]; tensor var_2406_cast = mul(x = transpose_44, y = cos_1_to_fp16_palettized)[name = tensor("op_2406_cast")]; tensor x1_53_begin_0 = const()[name = tensor("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = tensor("x1_53_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_53_end_mask_0 = const()[name = tensor("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = transpose_44)[name = tensor("x1_53_cast")]; tensor x2_53_begin_0 = const()[name = tensor("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_53_end_0 = const()[name = tensor("x2_53_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_53_end_mask_0 = const()[name = tensor("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = transpose_44)[name = tensor("x2_53_cast")]; tensor const_232_promoted_to_fp16 = const()[name = tensor("const_232_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2417_cast = mul(x = x2_53_cast, y = const_232_promoted_to_fp16)[name = tensor("op_2417_cast")]; tensor var_2419_interleave_0 = const()[name = tensor("op_2419_interleave_0"), val = tensor(false)]; tensor var_2419_cast = concat(axis = var_10, interleave = var_2419_interleave_0, values = (var_2417_cast, x1_53_cast))[name = tensor("op_2419_cast")]; tensor var_2420_cast = mul(x = var_2419_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2420_cast")]; tensor query_states_55_cast = add(x = var_2406_cast, y = var_2420_cast)[name = tensor("query_states_55_cast")]; tensor transpose_43 = transpose(perm = key_states_81_perm_0, x = var_2377_cast)[name = tensor("transpose_43")]; tensor var_2422_cast = mul(x = transpose_43, y = cos_1_to_fp16_palettized)[name = tensor("op_2422_cast")]; tensor x1_55_begin_0 = const()[name = tensor("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = tensor("x1_55_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_55_end_mask_0 = const()[name = tensor("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55_cast = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = transpose_43)[name = tensor("x1_55_cast")]; tensor x2_55_begin_0 = const()[name = tensor("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_55_end_0 = const()[name = tensor("x2_55_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_55_end_mask_0 = const()[name = tensor("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55_cast = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = transpose_43)[name = tensor("x2_55_cast")]; tensor const_235_promoted_to_fp16 = const()[name = tensor("const_235_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2433_cast = mul(x = x2_55_cast, y = const_235_promoted_to_fp16)[name = tensor("op_2433_cast")]; tensor var_2435_interleave_0 = const()[name = tensor("op_2435_interleave_0"), val = tensor(false)]; tensor var_2435_cast = concat(axis = var_10, interleave = var_2435_interleave_0, values = (var_2433_cast, x1_55_cast))[name = tensor("op_2435_cast")]; tensor var_2436_cast = mul(x = var_2435_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2436_cast")]; tensor hidden_states_401_cast = add(x = var_2422_cast, y = var_2436_cast)[name = tensor("hidden_states_401_cast")]; tensor var_2445_axes_0 = const()[name = tensor("op_2445_axes_0"), val = tensor([2])]; tensor var_2445_cast = expand_dims(axes = var_2445_axes_0, x = hidden_states_401_cast)[name = tensor("op_2445_cast")]; tensor hidden_states_403_reps_0 = const()[name = tensor("hidden_states_403_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_403_cast = tile(reps = hidden_states_403_reps_0, x = var_2445_cast)[name = tensor("hidden_states_403_cast")]; tensor var_2453 = const()[name = tensor("op_2453"), val = tensor([1, 32, 128, 64])]; tensor key_states_83_cast = reshape(shape = var_2453, x = hidden_states_403_cast)[name = tensor("key_states_83_cast")]; tensor var_2462_axes_0 = const()[name = tensor("op_2462_axes_0"), val = tensor([2])]; tensor transpose_42 = transpose(perm = hidden_states_405_perm_0, x = var_2380_cast)[name = tensor("transpose_42")]; tensor var_2462_cast = expand_dims(axes = var_2462_axes_0, x = transpose_42)[name = tensor("op_2462_cast")]; tensor hidden_states_407_reps_0 = const()[name = tensor("hidden_states_407_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_407_cast = tile(reps = hidden_states_407_reps_0, x = var_2462_cast)[name = tensor("hidden_states_407_cast")]; tensor var_2470 = const()[name = tensor("op_2470"), val = tensor([1, 32, 128, 64])]; tensor value_states_55_cast = reshape(shape = var_2470, x = hidden_states_407_cast)[name = tensor("value_states_55_cast")]; tensor var_2472_perm_0 = const()[name = tensor("op_2472_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2473_transpose_x_0 = const()[name = tensor("op_2473_transpose_x_0"), val = tensor(false)]; tensor var_2473_transpose_y_0 = const()[name = tensor("op_2473_transpose_y_0"), val = tensor(false)]; tensor transpose_41 = transpose(perm = var_2472_perm_0, x = key_states_83_cast)[name = tensor("transpose_41")]; tensor var_2473_cast = matmul(transpose_x = var_2473_transpose_x_0, transpose_y = var_2473_transpose_y_0, x = query_states_55_cast, y = transpose_41)[name = tensor("op_2473_cast")]; tensor _inversed_attn_weights_53_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_53_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_53_cast = mul(x = var_2473_cast, y = _inversed_attn_weights_53_y_0_to_fp16)[name = tensor("_inversed_attn_weights_53_cast")]; tensor input_131_cast = add(x = _inversed_attn_weights_53_cast, y = attention_mask_cast)[name = tensor("input_131_cast")]; tensor var_2477_cast = softmax(axis = var_10, x = input_131_cast)[name = tensor("op_2477_cast")]; tensor attn_output_53_transpose_x_0 = const()[name = tensor("attn_output_53_transpose_x_0"), val = tensor(false)]; tensor attn_output_53_transpose_y_0 = const()[name = tensor("attn_output_53_transpose_y_0"), val = tensor(false)]; tensor attn_output_53_cast = matmul(transpose_x = attn_output_53_transpose_x_0, transpose_y = attn_output_53_transpose_y_0, x = var_2477_cast, y = value_states_55_cast)[name = tensor("attn_output_53_cast")]; tensor var_2480_perm_0 = const()[name = tensor("op_2480_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2482 = const()[name = tensor("op_2482"), val = tensor([1, 128, 2048])]; tensor transpose_40 = transpose(perm = var_2480_perm_0, x = attn_output_53_cast)[name = tensor("transpose_40")]; tensor input_133_cast = reshape(shape = var_2482, x = transpose_40)[name = tensor("input_133_cast")]; tensor model_model_layers_13_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321809088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323906304))), name = tensor("model_model_layers_13_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_94_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_to_fp16_palettized, x = input_133_cast)[name = tensor("linear_94_cast")]; tensor hidden_states_411_cast = add(x = hidden_states_391_cast, y = linear_94_cast)[name = tensor("hidden_states_411_cast")]; tensor var_13_promoted_to_fp16_27 = const()[name = tensor("op_13_promoted_to_fp16_27"), val = tensor(0x1p+1)]; tensor var_2489_cast = pow(x = hidden_states_411_cast, y = var_13_promoted_to_fp16_27)[name = tensor("op_2489_cast")]; tensor var_2490 = const()[name = tensor("op_2490"), val = tensor([-1])]; tensor variance_55_cast = reduce_mean(axes = var_2490, keep_dims = var_23, x = var_2489_cast)[name = tensor("variance_55_cast")]; tensor var_2492_to_fp16 = const()[name = tensor("op_2492_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2493_cast = add(x = variance_55_cast, y = var_2492_to_fp16)[name = tensor("op_2493_cast")]; tensor var_2494_epsilon_0_to_fp16 = const()[name = tensor("op_2494_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2494_cast = rsqrt(epsilon = var_2494_epsilon_0_to_fp16, x = var_2493_cast)[name = tensor("op_2494_cast")]; tensor hidden_states_415_cast = mul(x = hidden_states_411_cast, y = var_2494_cast)[name = tensor("hidden_states_415_cast")]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323906432)))]; tensor input_135_cast = mul(x = model_model_layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_415_cast)[name = tensor("input_135_cast")]; tensor model_model_layers_13_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323910592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329677824))), name = tensor("model_model_layers_13_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_95_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_13_mlp_gate_proj_weight_to_fp16_palettized, x = input_135_cast)[name = tensor("linear_95_cast")]; tensor var_2506_cast = silu(x = linear_95_cast)[name = tensor("op_2506_cast")]; tensor model_model_layers_13_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329677952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335445184))), name = tensor("model_model_layers_13_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_96_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_13_mlp_up_proj_weight_to_fp16_palettized, x = input_135_cast)[name = tensor("linear_96_cast")]; tensor input_139_cast = mul(x = var_2506_cast, y = linear_96_cast)[name = tensor("input_139_cast")]; tensor model_model_layers_13_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335445312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341212544))), name = tensor("model_model_layers_13_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_97_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_mlp_down_proj_weight_to_fp16_palettized, x = input_139_cast)[name = tensor("linear_97_cast")]; tensor hidden_states_421_cast = add(x = hidden_states_411_cast, y = linear_97_cast)[name = tensor("hidden_states_421_cast")]; tensor var_13_promoted_to_fp16_28 = const()[name = tensor("op_13_promoted_to_fp16_28"), val = tensor(0x1p+1)]; tensor var_2519_cast = pow(x = hidden_states_421_cast, y = var_13_promoted_to_fp16_28)[name = tensor("op_2519_cast")]; tensor var_2520 = const()[name = tensor("op_2520"), val = tensor([-1])]; tensor variance_57_cast = reduce_mean(axes = var_2520, keep_dims = var_23, x = var_2519_cast)[name = tensor("variance_57_cast")]; tensor var_2522_to_fp16 = const()[name = tensor("op_2522_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2523_cast = add(x = variance_57_cast, y = var_2522_to_fp16)[name = tensor("op_2523_cast")]; tensor var_2524_epsilon_0_to_fp16 = const()[name = tensor("op_2524_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2524_cast = rsqrt(epsilon = var_2524_epsilon_0_to_fp16, x = var_2523_cast)[name = tensor("op_2524_cast")]; tensor hidden_states_425_cast = mul(x = hidden_states_421_cast, y = var_2524_cast)[name = tensor("hidden_states_425_cast")]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341212672)))]; tensor hidden_states_429_cast = mul(x = model_model_layers_14_input_layernorm_weight_to_fp16, y = hidden_states_425_cast)[name = tensor("hidden_states_429_cast")]; tensor model_model_layers_14_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341216832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(343314048))), name = tensor("model_model_layers_14_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_98_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_429_cast)[name = tensor("linear_98_cast")]; tensor model_model_layers_14_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(343314176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(343576384))), name = tensor("model_model_layers_14_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_99_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_429_cast)[name = tensor("linear_99_cast")]; tensor model_model_layers_14_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(343576512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(343838720))), name = tensor("model_model_layers_14_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_100_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_429_cast)[name = tensor("linear_100_cast")]; tensor var_2544 = const()[name = tensor("op_2544"), val = tensor([1, 128, 32, 64])]; tensor var_2545_cast = reshape(shape = var_2544, x = linear_98_cast)[name = tensor("op_2545_cast")]; tensor q_29_perm_0 = const()[name = tensor("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2547 = const()[name = tensor("op_2547"), val = tensor([1, 128, 4, 64])]; tensor var_2548_cast = reshape(shape = var_2547, x = linear_99_cast)[name = tensor("op_2548_cast")]; tensor key_states_87_perm_0 = const()[name = tensor("key_states_87_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2550 = const()[name = tensor("op_2550"), val = tensor([1, 128, 4, 64])]; tensor var_2551_cast = reshape(shape = var_2550, x = linear_100_cast)[name = tensor("op_2551_cast")]; tensor hidden_states_435_perm_0 = const()[name = tensor("hidden_states_435_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_39 = transpose(perm = q_29_perm_0, x = var_2545_cast)[name = tensor("transpose_39")]; tensor var_2577_cast = mul(x = transpose_39, y = cos_1_to_fp16_palettized)[name = tensor("op_2577_cast")]; tensor x1_57_begin_0 = const()[name = tensor("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = tensor("x1_57_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_57_end_mask_0 = const()[name = tensor("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57_cast = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = transpose_39)[name = tensor("x1_57_cast")]; tensor x2_57_begin_0 = const()[name = tensor("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_57_end_0 = const()[name = tensor("x2_57_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_57_end_mask_0 = const()[name = tensor("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57_cast = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = transpose_39)[name = tensor("x2_57_cast")]; tensor const_249_promoted_to_fp16 = const()[name = tensor("const_249_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2588_cast = mul(x = x2_57_cast, y = const_249_promoted_to_fp16)[name = tensor("op_2588_cast")]; tensor var_2590_interleave_0 = const()[name = tensor("op_2590_interleave_0"), val = tensor(false)]; tensor var_2590_cast = concat(axis = var_10, interleave = var_2590_interleave_0, values = (var_2588_cast, x1_57_cast))[name = tensor("op_2590_cast")]; tensor var_2591_cast = mul(x = var_2590_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2591_cast")]; tensor query_states_59_cast = add(x = var_2577_cast, y = var_2591_cast)[name = tensor("query_states_59_cast")]; tensor transpose_38 = transpose(perm = key_states_87_perm_0, x = var_2548_cast)[name = tensor("transpose_38")]; tensor var_2593_cast = mul(x = transpose_38, y = cos_1_to_fp16_palettized)[name = tensor("op_2593_cast")]; tensor x1_59_begin_0 = const()[name = tensor("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = tensor("x1_59_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_59_end_mask_0 = const()[name = tensor("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59_cast = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = transpose_38)[name = tensor("x1_59_cast")]; tensor x2_59_begin_0 = const()[name = tensor("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_59_end_0 = const()[name = tensor("x2_59_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_59_end_mask_0 = const()[name = tensor("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59_cast = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = transpose_38)[name = tensor("x2_59_cast")]; tensor const_252_promoted_to_fp16 = const()[name = tensor("const_252_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2604_cast = mul(x = x2_59_cast, y = const_252_promoted_to_fp16)[name = tensor("op_2604_cast")]; tensor var_2606_interleave_0 = const()[name = tensor("op_2606_interleave_0"), val = tensor(false)]; tensor var_2606_cast = concat(axis = var_10, interleave = var_2606_interleave_0, values = (var_2604_cast, x1_59_cast))[name = tensor("op_2606_cast")]; tensor var_2607_cast = mul(x = var_2606_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2607_cast")]; tensor hidden_states_431_cast = add(x = var_2593_cast, y = var_2607_cast)[name = tensor("hidden_states_431_cast")]; tensor var_2616_axes_0 = const()[name = tensor("op_2616_axes_0"), val = tensor([2])]; tensor var_2616_cast = expand_dims(axes = var_2616_axes_0, x = hidden_states_431_cast)[name = tensor("op_2616_cast")]; tensor hidden_states_433_reps_0 = const()[name = tensor("hidden_states_433_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_433_cast = tile(reps = hidden_states_433_reps_0, x = var_2616_cast)[name = tensor("hidden_states_433_cast")]; tensor var_2624 = const()[name = tensor("op_2624"), val = tensor([1, 32, 128, 64])]; tensor key_states_89_cast = reshape(shape = var_2624, x = hidden_states_433_cast)[name = tensor("key_states_89_cast")]; tensor var_2633_axes_0 = const()[name = tensor("op_2633_axes_0"), val = tensor([2])]; tensor transpose_37 = transpose(perm = hidden_states_435_perm_0, x = var_2551_cast)[name = tensor("transpose_37")]; tensor var_2633_cast = expand_dims(axes = var_2633_axes_0, x = transpose_37)[name = tensor("op_2633_cast")]; tensor hidden_states_437_reps_0 = const()[name = tensor("hidden_states_437_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_437_cast = tile(reps = hidden_states_437_reps_0, x = var_2633_cast)[name = tensor("hidden_states_437_cast")]; tensor var_2641 = const()[name = tensor("op_2641"), val = tensor([1, 32, 128, 64])]; tensor value_states_59_cast = reshape(shape = var_2641, x = hidden_states_437_cast)[name = tensor("value_states_59_cast")]; tensor var_2643_perm_0 = const()[name = tensor("op_2643_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2644_transpose_x_0 = const()[name = tensor("op_2644_transpose_x_0"), val = tensor(false)]; tensor var_2644_transpose_y_0 = const()[name = tensor("op_2644_transpose_y_0"), val = tensor(false)]; tensor transpose_36 = transpose(perm = var_2643_perm_0, x = key_states_89_cast)[name = tensor("transpose_36")]; tensor var_2644_cast = matmul(transpose_x = var_2644_transpose_x_0, transpose_y = var_2644_transpose_y_0, x = query_states_59_cast, y = transpose_36)[name = tensor("op_2644_cast")]; tensor _inversed_attn_weights_57_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_57_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_57_cast = mul(x = var_2644_cast, y = _inversed_attn_weights_57_y_0_to_fp16)[name = tensor("_inversed_attn_weights_57_cast")]; tensor input_141_cast = add(x = _inversed_attn_weights_57_cast, y = attention_mask_cast)[name = tensor("input_141_cast")]; tensor var_2648_cast = softmax(axis = var_10, x = input_141_cast)[name = tensor("op_2648_cast")]; tensor attn_output_57_transpose_x_0 = const()[name = tensor("attn_output_57_transpose_x_0"), val = tensor(false)]; tensor attn_output_57_transpose_y_0 = const()[name = tensor("attn_output_57_transpose_y_0"), val = tensor(false)]; tensor attn_output_57_cast = matmul(transpose_x = attn_output_57_transpose_x_0, transpose_y = attn_output_57_transpose_y_0, x = var_2648_cast, y = value_states_59_cast)[name = tensor("attn_output_57_cast")]; tensor var_2651_perm_0 = const()[name = tensor("op_2651_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2653 = const()[name = tensor("op_2653"), val = tensor([1, 128, 2048])]; tensor transpose_35 = transpose(perm = var_2651_perm_0, x = attn_output_57_cast)[name = tensor("transpose_35")]; tensor input_143_cast = reshape(shape = var_2653, x = transpose_35)[name = tensor("input_143_cast")]; tensor model_model_layers_14_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(343838848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(345936064))), name = tensor("model_model_layers_14_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_101_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_to_fp16_palettized, x = input_143_cast)[name = tensor("linear_101_cast")]; tensor hidden_states_441_cast = add(x = hidden_states_421_cast, y = linear_101_cast)[name = tensor("hidden_states_441_cast")]; tensor var_13_promoted_to_fp16_29 = const()[name = tensor("op_13_promoted_to_fp16_29"), val = tensor(0x1p+1)]; tensor var_2660_cast = pow(x = hidden_states_441_cast, y = var_13_promoted_to_fp16_29)[name = tensor("op_2660_cast")]; tensor var_2661 = const()[name = tensor("op_2661"), val = tensor([-1])]; tensor variance_59_cast = reduce_mean(axes = var_2661, keep_dims = var_23, x = var_2660_cast)[name = tensor("variance_59_cast")]; tensor var_2663_to_fp16 = const()[name = tensor("op_2663_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2664_cast = add(x = variance_59_cast, y = var_2663_to_fp16)[name = tensor("op_2664_cast")]; tensor var_2665_epsilon_0_to_fp16 = const()[name = tensor("op_2665_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2665_cast = rsqrt(epsilon = var_2665_epsilon_0_to_fp16, x = var_2664_cast)[name = tensor("op_2665_cast")]; tensor hidden_states_445_cast = mul(x = hidden_states_441_cast, y = var_2665_cast)[name = tensor("hidden_states_445_cast")]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(345936192)))]; tensor input_145_cast = mul(x = model_model_layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_445_cast)[name = tensor("input_145_cast")]; tensor model_model_layers_14_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(345940352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351707584))), name = tensor("model_model_layers_14_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_102_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_14_mlp_gate_proj_weight_to_fp16_palettized, x = input_145_cast)[name = tensor("linear_102_cast")]; tensor var_2677_cast = silu(x = linear_102_cast)[name = tensor("op_2677_cast")]; tensor model_model_layers_14_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351707712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357474944))), name = tensor("model_model_layers_14_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_103_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_14_mlp_up_proj_weight_to_fp16_palettized, x = input_145_cast)[name = tensor("linear_103_cast")]; tensor input_149_cast = mul(x = var_2677_cast, y = linear_103_cast)[name = tensor("input_149_cast")]; tensor model_model_layers_14_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357475072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363242304))), name = tensor("model_model_layers_14_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_104_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_mlp_down_proj_weight_to_fp16_palettized, x = input_149_cast)[name = tensor("linear_104_cast")]; tensor hidden_states_451_cast = add(x = hidden_states_441_cast, y = linear_104_cast)[name = tensor("hidden_states_451_cast")]; tensor var_13_promoted_to_fp16_30 = const()[name = tensor("op_13_promoted_to_fp16_30"), val = tensor(0x1p+1)]; tensor var_2690_cast = pow(x = hidden_states_451_cast, y = var_13_promoted_to_fp16_30)[name = tensor("op_2690_cast")]; tensor var_2691 = const()[name = tensor("op_2691"), val = tensor([-1])]; tensor variance_61_cast = reduce_mean(axes = var_2691, keep_dims = var_23, x = var_2690_cast)[name = tensor("variance_61_cast")]; tensor var_2693_to_fp16 = const()[name = tensor("op_2693_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2694_cast = add(x = variance_61_cast, y = var_2693_to_fp16)[name = tensor("op_2694_cast")]; tensor var_2695_epsilon_0_to_fp16 = const()[name = tensor("op_2695_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2695_cast = rsqrt(epsilon = var_2695_epsilon_0_to_fp16, x = var_2694_cast)[name = tensor("op_2695_cast")]; tensor hidden_states_455_cast = mul(x = hidden_states_451_cast, y = var_2695_cast)[name = tensor("hidden_states_455_cast")]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363242432)))]; tensor hidden_states_459_cast = mul(x = model_model_layers_15_input_layernorm_weight_to_fp16, y = hidden_states_455_cast)[name = tensor("hidden_states_459_cast")]; tensor model_model_layers_15_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363246592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365343808))), name = tensor("model_model_layers_15_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_105_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_459_cast)[name = tensor("linear_105_cast")]; tensor model_model_layers_15_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365343936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365606144))), name = tensor("model_model_layers_15_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_106_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_459_cast)[name = tensor("linear_106_cast")]; tensor model_model_layers_15_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365606272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365868480))), name = tensor("model_model_layers_15_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_107_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_459_cast)[name = tensor("linear_107_cast")]; tensor var_2715 = const()[name = tensor("op_2715"), val = tensor([1, 128, 32, 64])]; tensor var_2716_cast = reshape(shape = var_2715, x = linear_105_cast)[name = tensor("op_2716_cast")]; tensor q_31_perm_0 = const()[name = tensor("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2718 = const()[name = tensor("op_2718"), val = tensor([1, 128, 4, 64])]; tensor var_2719_cast = reshape(shape = var_2718, x = linear_106_cast)[name = tensor("op_2719_cast")]; tensor key_states_93_perm_0 = const()[name = tensor("key_states_93_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2721 = const()[name = tensor("op_2721"), val = tensor([1, 128, 4, 64])]; tensor var_2722_cast = reshape(shape = var_2721, x = linear_107_cast)[name = tensor("op_2722_cast")]; tensor hidden_states_465_perm_0 = const()[name = tensor("hidden_states_465_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_34 = transpose(perm = q_31_perm_0, x = var_2716_cast)[name = tensor("transpose_34")]; tensor var_2748_cast = mul(x = transpose_34, y = cos_1_to_fp16_palettized)[name = tensor("op_2748_cast")]; tensor x1_61_begin_0 = const()[name = tensor("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = tensor("x1_61_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_61_end_mask_0 = const()[name = tensor("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61_cast = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = transpose_34)[name = tensor("x1_61_cast")]; tensor x2_61_begin_0 = const()[name = tensor("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_61_end_0 = const()[name = tensor("x2_61_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_61_end_mask_0 = const()[name = tensor("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61_cast = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = transpose_34)[name = tensor("x2_61_cast")]; tensor const_266_promoted_to_fp16 = const()[name = tensor("const_266_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2759_cast = mul(x = x2_61_cast, y = const_266_promoted_to_fp16)[name = tensor("op_2759_cast")]; tensor var_2761_interleave_0 = const()[name = tensor("op_2761_interleave_0"), val = tensor(false)]; tensor var_2761_cast = concat(axis = var_10, interleave = var_2761_interleave_0, values = (var_2759_cast, x1_61_cast))[name = tensor("op_2761_cast")]; tensor var_2762_cast = mul(x = var_2761_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2762_cast")]; tensor query_states_63_cast = add(x = var_2748_cast, y = var_2762_cast)[name = tensor("query_states_63_cast")]; tensor transpose_33 = transpose(perm = key_states_93_perm_0, x = var_2719_cast)[name = tensor("transpose_33")]; tensor var_2764_cast = mul(x = transpose_33, y = cos_1_to_fp16_palettized)[name = tensor("op_2764_cast")]; tensor x1_63_begin_0 = const()[name = tensor("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = tensor("x1_63_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_63_end_mask_0 = const()[name = tensor("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63_cast = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = transpose_33)[name = tensor("x1_63_cast")]; tensor x2_63_begin_0 = const()[name = tensor("x2_63_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_63_end_0 = const()[name = tensor("x2_63_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_63_end_mask_0 = const()[name = tensor("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63_cast = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = transpose_33)[name = tensor("x2_63_cast")]; tensor const_269_promoted_to_fp16 = const()[name = tensor("const_269_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2775_cast = mul(x = x2_63_cast, y = const_269_promoted_to_fp16)[name = tensor("op_2775_cast")]; tensor var_2777_interleave_0 = const()[name = tensor("op_2777_interleave_0"), val = tensor(false)]; tensor var_2777_cast = concat(axis = var_10, interleave = var_2777_interleave_0, values = (var_2775_cast, x1_63_cast))[name = tensor("op_2777_cast")]; tensor var_2778_cast = mul(x = var_2777_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2778_cast")]; tensor hidden_states_461_cast = add(x = var_2764_cast, y = var_2778_cast)[name = tensor("hidden_states_461_cast")]; tensor var_2787_axes_0 = const()[name = tensor("op_2787_axes_0"), val = tensor([2])]; tensor var_2787_cast = expand_dims(axes = var_2787_axes_0, x = hidden_states_461_cast)[name = tensor("op_2787_cast")]; tensor hidden_states_463_reps_0 = const()[name = tensor("hidden_states_463_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_463_cast = tile(reps = hidden_states_463_reps_0, x = var_2787_cast)[name = tensor("hidden_states_463_cast")]; tensor var_2795 = const()[name = tensor("op_2795"), val = tensor([1, 32, 128, 64])]; tensor key_states_95_cast = reshape(shape = var_2795, x = hidden_states_463_cast)[name = tensor("key_states_95_cast")]; tensor var_2804_axes_0 = const()[name = tensor("op_2804_axes_0"), val = tensor([2])]; tensor transpose_32 = transpose(perm = hidden_states_465_perm_0, x = var_2722_cast)[name = tensor("transpose_32")]; tensor var_2804_cast = expand_dims(axes = var_2804_axes_0, x = transpose_32)[name = tensor("op_2804_cast")]; tensor hidden_states_467_reps_0 = const()[name = tensor("hidden_states_467_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_467_cast = tile(reps = hidden_states_467_reps_0, x = var_2804_cast)[name = tensor("hidden_states_467_cast")]; tensor var_2812 = const()[name = tensor("op_2812"), val = tensor([1, 32, 128, 64])]; tensor value_states_63_cast = reshape(shape = var_2812, x = hidden_states_467_cast)[name = tensor("value_states_63_cast")]; tensor var_2814_perm_0 = const()[name = tensor("op_2814_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2815_transpose_x_0 = const()[name = tensor("op_2815_transpose_x_0"), val = tensor(false)]; tensor var_2815_transpose_y_0 = const()[name = tensor("op_2815_transpose_y_0"), val = tensor(false)]; tensor transpose_31 = transpose(perm = var_2814_perm_0, x = key_states_95_cast)[name = tensor("transpose_31")]; tensor var_2815_cast = matmul(transpose_x = var_2815_transpose_x_0, transpose_y = var_2815_transpose_y_0, x = query_states_63_cast, y = transpose_31)[name = tensor("op_2815_cast")]; tensor _inversed_attn_weights_61_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_61_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_61_cast = mul(x = var_2815_cast, y = _inversed_attn_weights_61_y_0_to_fp16)[name = tensor("_inversed_attn_weights_61_cast")]; tensor input_151_cast = add(x = _inversed_attn_weights_61_cast, y = attention_mask_cast)[name = tensor("input_151_cast")]; tensor var_2819_cast = softmax(axis = var_10, x = input_151_cast)[name = tensor("op_2819_cast")]; tensor attn_output_61_transpose_x_0 = const()[name = tensor("attn_output_61_transpose_x_0"), val = tensor(false)]; tensor attn_output_61_transpose_y_0 = const()[name = tensor("attn_output_61_transpose_y_0"), val = tensor(false)]; tensor attn_output_61_cast = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = var_2819_cast, y = value_states_63_cast)[name = tensor("attn_output_61_cast")]; tensor var_2822_perm_0 = const()[name = tensor("op_2822_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2824 = const()[name = tensor("op_2824"), val = tensor([1, 128, 2048])]; tensor transpose_30 = transpose(perm = var_2822_perm_0, x = attn_output_61_cast)[name = tensor("transpose_30")]; tensor input_153_cast = reshape(shape = var_2824, x = transpose_30)[name = tensor("input_153_cast")]; tensor model_model_layers_15_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(365868608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(367965824))), name = tensor("model_model_layers_15_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_108_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_to_fp16_palettized, x = input_153_cast)[name = tensor("linear_108_cast")]; tensor hidden_states_471_cast = add(x = hidden_states_451_cast, y = linear_108_cast)[name = tensor("hidden_states_471_cast")]; tensor var_13_promoted_to_fp16_31 = const()[name = tensor("op_13_promoted_to_fp16_31"), val = tensor(0x1p+1)]; tensor var_2831_cast = pow(x = hidden_states_471_cast, y = var_13_promoted_to_fp16_31)[name = tensor("op_2831_cast")]; tensor var_2832 = const()[name = tensor("op_2832"), val = tensor([-1])]; tensor variance_63_cast = reduce_mean(axes = var_2832, keep_dims = var_23, x = var_2831_cast)[name = tensor("variance_63_cast")]; tensor var_2834_to_fp16 = const()[name = tensor("op_2834_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2835_cast = add(x = variance_63_cast, y = var_2834_to_fp16)[name = tensor("op_2835_cast")]; tensor var_2836_epsilon_0_to_fp16 = const()[name = tensor("op_2836_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2836_cast = rsqrt(epsilon = var_2836_epsilon_0_to_fp16, x = var_2835_cast)[name = tensor("op_2836_cast")]; tensor hidden_states_475_cast = mul(x = hidden_states_471_cast, y = var_2836_cast)[name = tensor("hidden_states_475_cast")]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(367965952)))]; tensor input_155_cast = mul(x = model_model_layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_475_cast)[name = tensor("input_155_cast")]; tensor model_model_layers_15_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(367970112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373737344))), name = tensor("model_model_layers_15_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_109_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_15_mlp_gate_proj_weight_to_fp16_palettized, x = input_155_cast)[name = tensor("linear_109_cast")]; tensor var_2848_cast = silu(x = linear_109_cast)[name = tensor("op_2848_cast")]; tensor model_model_layers_15_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373737472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379504704))), name = tensor("model_model_layers_15_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_110_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_15_mlp_up_proj_weight_to_fp16_palettized, x = input_155_cast)[name = tensor("linear_110_cast")]; tensor input_159_cast = mul(x = var_2848_cast, y = linear_110_cast)[name = tensor("input_159_cast")]; tensor model_model_layers_15_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379504832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385272064))), name = tensor("model_model_layers_15_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_111_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_mlp_down_proj_weight_to_fp16_palettized, x = input_159_cast)[name = tensor("linear_111_cast")]; tensor hidden_states_481_cast = add(x = hidden_states_471_cast, y = linear_111_cast)[name = tensor("hidden_states_481_cast")]; tensor var_13_promoted_to_fp16_32 = const()[name = tensor("op_13_promoted_to_fp16_32"), val = tensor(0x1p+1)]; tensor var_2861_cast = pow(x = hidden_states_481_cast, y = var_13_promoted_to_fp16_32)[name = tensor("op_2861_cast")]; tensor var_2862 = const()[name = tensor("op_2862"), val = tensor([-1])]; tensor variance_65_cast = reduce_mean(axes = var_2862, keep_dims = var_23, x = var_2861_cast)[name = tensor("variance_65_cast")]; tensor var_2864_to_fp16 = const()[name = tensor("op_2864_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_2865_cast = add(x = variance_65_cast, y = var_2864_to_fp16)[name = tensor("op_2865_cast")]; tensor var_2866_epsilon_0_to_fp16 = const()[name = tensor("op_2866_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_2866_cast = rsqrt(epsilon = var_2866_epsilon_0_to_fp16, x = var_2865_cast)[name = tensor("op_2866_cast")]; tensor hidden_states_485_cast = mul(x = hidden_states_481_cast, y = var_2866_cast)[name = tensor("hidden_states_485_cast")]; tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385272192)))]; tensor hidden_states_489_cast = mul(x = model_model_layers_16_input_layernorm_weight_to_fp16, y = hidden_states_485_cast)[name = tensor("hidden_states_489_cast")]; tensor model_model_layers_16_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385276352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387373568))), name = tensor("model_model_layers_16_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_112_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_489_cast)[name = tensor("linear_112_cast")]; tensor model_model_layers_16_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387373696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387635904))), name = tensor("model_model_layers_16_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_113_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_489_cast)[name = tensor("linear_113_cast")]; tensor model_model_layers_16_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387636032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387898240))), name = tensor("model_model_layers_16_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_114_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_489_cast)[name = tensor("linear_114_cast")]; tensor var_2886 = const()[name = tensor("op_2886"), val = tensor([1, 128, 32, 64])]; tensor var_2887_cast = reshape(shape = var_2886, x = linear_112_cast)[name = tensor("op_2887_cast")]; tensor q_33_perm_0 = const()[name = tensor("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2889 = const()[name = tensor("op_2889"), val = tensor([1, 128, 4, 64])]; tensor var_2890_cast = reshape(shape = var_2889, x = linear_113_cast)[name = tensor("op_2890_cast")]; tensor key_states_99_perm_0 = const()[name = tensor("key_states_99_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2892 = const()[name = tensor("op_2892"), val = tensor([1, 128, 4, 64])]; tensor var_2893_cast = reshape(shape = var_2892, x = linear_114_cast)[name = tensor("op_2893_cast")]; tensor hidden_states_495_perm_0 = const()[name = tensor("hidden_states_495_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_29 = transpose(perm = q_33_perm_0, x = var_2887_cast)[name = tensor("transpose_29")]; tensor var_2919_cast = mul(x = transpose_29, y = cos_1_to_fp16_palettized)[name = tensor("op_2919_cast")]; tensor x1_65_begin_0 = const()[name = tensor("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = tensor("x1_65_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_65_end_mask_0 = const()[name = tensor("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65_cast = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = transpose_29)[name = tensor("x1_65_cast")]; tensor x2_65_begin_0 = const()[name = tensor("x2_65_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_65_end_0 = const()[name = tensor("x2_65_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_65_end_mask_0 = const()[name = tensor("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65_cast = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = transpose_29)[name = tensor("x2_65_cast")]; tensor const_283_promoted_to_fp16 = const()[name = tensor("const_283_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2930_cast = mul(x = x2_65_cast, y = const_283_promoted_to_fp16)[name = tensor("op_2930_cast")]; tensor var_2932_interleave_0 = const()[name = tensor("op_2932_interleave_0"), val = tensor(false)]; tensor var_2932_cast = concat(axis = var_10, interleave = var_2932_interleave_0, values = (var_2930_cast, x1_65_cast))[name = tensor("op_2932_cast")]; tensor var_2933_cast = mul(x = var_2932_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2933_cast")]; tensor query_states_67_cast = add(x = var_2919_cast, y = var_2933_cast)[name = tensor("query_states_67_cast")]; tensor transpose_28 = transpose(perm = key_states_99_perm_0, x = var_2890_cast)[name = tensor("transpose_28")]; tensor var_2935_cast = mul(x = transpose_28, y = cos_1_to_fp16_palettized)[name = tensor("op_2935_cast")]; tensor x1_67_begin_0 = const()[name = tensor("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = tensor("x1_67_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_67_end_mask_0 = const()[name = tensor("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67_cast = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = transpose_28)[name = tensor("x1_67_cast")]; tensor x2_67_begin_0 = const()[name = tensor("x2_67_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_67_end_0 = const()[name = tensor("x2_67_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_67_end_mask_0 = const()[name = tensor("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67_cast = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = transpose_28)[name = tensor("x2_67_cast")]; tensor const_286_promoted_to_fp16 = const()[name = tensor("const_286_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_2946_cast = mul(x = x2_67_cast, y = const_286_promoted_to_fp16)[name = tensor("op_2946_cast")]; tensor var_2948_interleave_0 = const()[name = tensor("op_2948_interleave_0"), val = tensor(false)]; tensor var_2948_cast = concat(axis = var_10, interleave = var_2948_interleave_0, values = (var_2946_cast, x1_67_cast))[name = tensor("op_2948_cast")]; tensor var_2949_cast = mul(x = var_2948_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_2949_cast")]; tensor hidden_states_491_cast = add(x = var_2935_cast, y = var_2949_cast)[name = tensor("hidden_states_491_cast")]; tensor var_2958_axes_0 = const()[name = tensor("op_2958_axes_0"), val = tensor([2])]; tensor var_2958_cast = expand_dims(axes = var_2958_axes_0, x = hidden_states_491_cast)[name = tensor("op_2958_cast")]; tensor hidden_states_493_reps_0 = const()[name = tensor("hidden_states_493_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_493_cast = tile(reps = hidden_states_493_reps_0, x = var_2958_cast)[name = tensor("hidden_states_493_cast")]; tensor var_2966 = const()[name = tensor("op_2966"), val = tensor([1, 32, 128, 64])]; tensor key_states_101_cast = reshape(shape = var_2966, x = hidden_states_493_cast)[name = tensor("key_states_101_cast")]; tensor var_2975_axes_0 = const()[name = tensor("op_2975_axes_0"), val = tensor([2])]; tensor transpose_27 = transpose(perm = hidden_states_495_perm_0, x = var_2893_cast)[name = tensor("transpose_27")]; tensor var_2975_cast = expand_dims(axes = var_2975_axes_0, x = transpose_27)[name = tensor("op_2975_cast")]; tensor hidden_states_497_reps_0 = const()[name = tensor("hidden_states_497_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_497_cast = tile(reps = hidden_states_497_reps_0, x = var_2975_cast)[name = tensor("hidden_states_497_cast")]; tensor var_2983 = const()[name = tensor("op_2983"), val = tensor([1, 32, 128, 64])]; tensor value_states_67_cast = reshape(shape = var_2983, x = hidden_states_497_cast)[name = tensor("value_states_67_cast")]; tensor var_2985_perm_0 = const()[name = tensor("op_2985_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_2986_transpose_x_0 = const()[name = tensor("op_2986_transpose_x_0"), val = tensor(false)]; tensor var_2986_transpose_y_0 = const()[name = tensor("op_2986_transpose_y_0"), val = tensor(false)]; tensor transpose_26 = transpose(perm = var_2985_perm_0, x = key_states_101_cast)[name = tensor("transpose_26")]; tensor var_2986_cast = matmul(transpose_x = var_2986_transpose_x_0, transpose_y = var_2986_transpose_y_0, x = query_states_67_cast, y = transpose_26)[name = tensor("op_2986_cast")]; tensor _inversed_attn_weights_65_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_65_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_65_cast = mul(x = var_2986_cast, y = _inversed_attn_weights_65_y_0_to_fp16)[name = tensor("_inversed_attn_weights_65_cast")]; tensor input_161_cast = add(x = _inversed_attn_weights_65_cast, y = attention_mask_cast)[name = tensor("input_161_cast")]; tensor var_2990_cast = softmax(axis = var_10, x = input_161_cast)[name = tensor("op_2990_cast")]; tensor attn_output_65_transpose_x_0 = const()[name = tensor("attn_output_65_transpose_x_0"), val = tensor(false)]; tensor attn_output_65_transpose_y_0 = const()[name = tensor("attn_output_65_transpose_y_0"), val = tensor(false)]; tensor attn_output_65_cast = matmul(transpose_x = attn_output_65_transpose_x_0, transpose_y = attn_output_65_transpose_y_0, x = var_2990_cast, y = value_states_67_cast)[name = tensor("attn_output_65_cast")]; tensor var_2993_perm_0 = const()[name = tensor("op_2993_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_2995 = const()[name = tensor("op_2995"), val = tensor([1, 128, 2048])]; tensor transpose_25 = transpose(perm = var_2993_perm_0, x = attn_output_65_cast)[name = tensor("transpose_25")]; tensor input_163_cast = reshape(shape = var_2995, x = transpose_25)[name = tensor("input_163_cast")]; tensor model_model_layers_16_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387898368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389995584))), name = tensor("model_model_layers_16_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_115_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_to_fp16_palettized, x = input_163_cast)[name = tensor("linear_115_cast")]; tensor hidden_states_501_cast = add(x = hidden_states_481_cast, y = linear_115_cast)[name = tensor("hidden_states_501_cast")]; tensor var_13_promoted_to_fp16_33 = const()[name = tensor("op_13_promoted_to_fp16_33"), val = tensor(0x1p+1)]; tensor var_3002_cast = pow(x = hidden_states_501_cast, y = var_13_promoted_to_fp16_33)[name = tensor("op_3002_cast")]; tensor var_3003 = const()[name = tensor("op_3003"), val = tensor([-1])]; tensor variance_67_cast = reduce_mean(axes = var_3003, keep_dims = var_23, x = var_3002_cast)[name = tensor("variance_67_cast")]; tensor var_3005_to_fp16 = const()[name = tensor("op_3005_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3006_cast = add(x = variance_67_cast, y = var_3005_to_fp16)[name = tensor("op_3006_cast")]; tensor var_3007_epsilon_0_to_fp16 = const()[name = tensor("op_3007_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3007_cast = rsqrt(epsilon = var_3007_epsilon_0_to_fp16, x = var_3006_cast)[name = tensor("op_3007_cast")]; tensor hidden_states_505_cast = mul(x = hidden_states_501_cast, y = var_3007_cast)[name = tensor("hidden_states_505_cast")]; tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389995712)))]; tensor input_165_cast = mul(x = model_model_layers_16_post_attention_layernorm_weight_to_fp16, y = hidden_states_505_cast)[name = tensor("input_165_cast")]; tensor model_model_layers_16_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(389999872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395767104))), name = tensor("model_model_layers_16_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_116_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_16_mlp_gate_proj_weight_to_fp16_palettized, x = input_165_cast)[name = tensor("linear_116_cast")]; tensor var_3019_cast = silu(x = linear_116_cast)[name = tensor("op_3019_cast")]; tensor model_model_layers_16_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(395767232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401534464))), name = tensor("model_model_layers_16_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_117_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_16_mlp_up_proj_weight_to_fp16_palettized, x = input_165_cast)[name = tensor("linear_117_cast")]; tensor input_169_cast = mul(x = var_3019_cast, y = linear_117_cast)[name = tensor("input_169_cast")]; tensor model_model_layers_16_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401534592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407301824))), name = tensor("model_model_layers_16_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_118_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_mlp_down_proj_weight_to_fp16_palettized, x = input_169_cast)[name = tensor("linear_118_cast")]; tensor hidden_states_511_cast = add(x = hidden_states_501_cast, y = linear_118_cast)[name = tensor("hidden_states_511_cast")]; tensor var_13_promoted_to_fp16_34 = const()[name = tensor("op_13_promoted_to_fp16_34"), val = tensor(0x1p+1)]; tensor var_3032_cast = pow(x = hidden_states_511_cast, y = var_13_promoted_to_fp16_34)[name = tensor("op_3032_cast")]; tensor var_3033 = const()[name = tensor("op_3033"), val = tensor([-1])]; tensor variance_69_cast = reduce_mean(axes = var_3033, keep_dims = var_23, x = var_3032_cast)[name = tensor("variance_69_cast")]; tensor var_3035_to_fp16 = const()[name = tensor("op_3035_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3036_cast = add(x = variance_69_cast, y = var_3035_to_fp16)[name = tensor("op_3036_cast")]; tensor var_3037_epsilon_0_to_fp16 = const()[name = tensor("op_3037_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3037_cast = rsqrt(epsilon = var_3037_epsilon_0_to_fp16, x = var_3036_cast)[name = tensor("op_3037_cast")]; tensor hidden_states_515_cast = mul(x = hidden_states_511_cast, y = var_3037_cast)[name = tensor("hidden_states_515_cast")]; tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407301952)))]; tensor hidden_states_519_cast = mul(x = model_model_layers_17_input_layernorm_weight_to_fp16, y = hidden_states_515_cast)[name = tensor("hidden_states_519_cast")]; tensor model_model_layers_17_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407306112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409403328))), name = tensor("model_model_layers_17_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_119_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_519_cast)[name = tensor("linear_119_cast")]; tensor model_model_layers_17_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409403456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409665664))), name = tensor("model_model_layers_17_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_120_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_519_cast)[name = tensor("linear_120_cast")]; tensor model_model_layers_17_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409665792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409928000))), name = tensor("model_model_layers_17_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_121_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_519_cast)[name = tensor("linear_121_cast")]; tensor var_3057 = const()[name = tensor("op_3057"), val = tensor([1, 128, 32, 64])]; tensor var_3058_cast = reshape(shape = var_3057, x = linear_119_cast)[name = tensor("op_3058_cast")]; tensor q_35_perm_0 = const()[name = tensor("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3060 = const()[name = tensor("op_3060"), val = tensor([1, 128, 4, 64])]; tensor var_3061_cast = reshape(shape = var_3060, x = linear_120_cast)[name = tensor("op_3061_cast")]; tensor key_states_105_perm_0 = const()[name = tensor("key_states_105_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3063 = const()[name = tensor("op_3063"), val = tensor([1, 128, 4, 64])]; tensor var_3064_cast = reshape(shape = var_3063, x = linear_121_cast)[name = tensor("op_3064_cast")]; tensor hidden_states_525_perm_0 = const()[name = tensor("hidden_states_525_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_24 = transpose(perm = q_35_perm_0, x = var_3058_cast)[name = tensor("transpose_24")]; tensor var_3090_cast = mul(x = transpose_24, y = cos_1_to_fp16_palettized)[name = tensor("op_3090_cast")]; tensor x1_69_begin_0 = const()[name = tensor("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = tensor("x1_69_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_69_end_mask_0 = const()[name = tensor("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69_cast = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = transpose_24)[name = tensor("x1_69_cast")]; tensor x2_69_begin_0 = const()[name = tensor("x2_69_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_69_end_0 = const()[name = tensor("x2_69_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_69_end_mask_0 = const()[name = tensor("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69_cast = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = transpose_24)[name = tensor("x2_69_cast")]; tensor const_300_promoted_to_fp16 = const()[name = tensor("const_300_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3101_cast = mul(x = x2_69_cast, y = const_300_promoted_to_fp16)[name = tensor("op_3101_cast")]; tensor var_3103_interleave_0 = const()[name = tensor("op_3103_interleave_0"), val = tensor(false)]; tensor var_3103_cast = concat(axis = var_10, interleave = var_3103_interleave_0, values = (var_3101_cast, x1_69_cast))[name = tensor("op_3103_cast")]; tensor var_3104_cast = mul(x = var_3103_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3104_cast")]; tensor query_states_71_cast = add(x = var_3090_cast, y = var_3104_cast)[name = tensor("query_states_71_cast")]; tensor transpose_23 = transpose(perm = key_states_105_perm_0, x = var_3061_cast)[name = tensor("transpose_23")]; tensor var_3106_cast = mul(x = transpose_23, y = cos_1_to_fp16_palettized)[name = tensor("op_3106_cast")]; tensor x1_71_begin_0 = const()[name = tensor("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = tensor("x1_71_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_71_end_mask_0 = const()[name = tensor("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71_cast = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = transpose_23)[name = tensor("x1_71_cast")]; tensor x2_71_begin_0 = const()[name = tensor("x2_71_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_71_end_0 = const()[name = tensor("x2_71_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_71_end_mask_0 = const()[name = tensor("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71_cast = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = transpose_23)[name = tensor("x2_71_cast")]; tensor const_303_promoted_to_fp16 = const()[name = tensor("const_303_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3117_cast = mul(x = x2_71_cast, y = const_303_promoted_to_fp16)[name = tensor("op_3117_cast")]; tensor var_3119_interleave_0 = const()[name = tensor("op_3119_interleave_0"), val = tensor(false)]; tensor var_3119_cast = concat(axis = var_10, interleave = var_3119_interleave_0, values = (var_3117_cast, x1_71_cast))[name = tensor("op_3119_cast")]; tensor var_3120_cast = mul(x = var_3119_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3120_cast")]; tensor hidden_states_521_cast = add(x = var_3106_cast, y = var_3120_cast)[name = tensor("hidden_states_521_cast")]; tensor var_3129_axes_0 = const()[name = tensor("op_3129_axes_0"), val = tensor([2])]; tensor var_3129_cast = expand_dims(axes = var_3129_axes_0, x = hidden_states_521_cast)[name = tensor("op_3129_cast")]; tensor hidden_states_523_reps_0 = const()[name = tensor("hidden_states_523_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_523_cast = tile(reps = hidden_states_523_reps_0, x = var_3129_cast)[name = tensor("hidden_states_523_cast")]; tensor var_3137 = const()[name = tensor("op_3137"), val = tensor([1, 32, 128, 64])]; tensor key_states_107_cast = reshape(shape = var_3137, x = hidden_states_523_cast)[name = tensor("key_states_107_cast")]; tensor var_3146_axes_0 = const()[name = tensor("op_3146_axes_0"), val = tensor([2])]; tensor transpose_22 = transpose(perm = hidden_states_525_perm_0, x = var_3064_cast)[name = tensor("transpose_22")]; tensor var_3146_cast = expand_dims(axes = var_3146_axes_0, x = transpose_22)[name = tensor("op_3146_cast")]; tensor hidden_states_527_reps_0 = const()[name = tensor("hidden_states_527_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_527_cast = tile(reps = hidden_states_527_reps_0, x = var_3146_cast)[name = tensor("hidden_states_527_cast")]; tensor var_3154 = const()[name = tensor("op_3154"), val = tensor([1, 32, 128, 64])]; tensor value_states_71_cast = reshape(shape = var_3154, x = hidden_states_527_cast)[name = tensor("value_states_71_cast")]; tensor var_3156_perm_0 = const()[name = tensor("op_3156_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3157_transpose_x_0 = const()[name = tensor("op_3157_transpose_x_0"), val = tensor(false)]; tensor var_3157_transpose_y_0 = const()[name = tensor("op_3157_transpose_y_0"), val = tensor(false)]; tensor transpose_21 = transpose(perm = var_3156_perm_0, x = key_states_107_cast)[name = tensor("transpose_21")]; tensor var_3157_cast = matmul(transpose_x = var_3157_transpose_x_0, transpose_y = var_3157_transpose_y_0, x = query_states_71_cast, y = transpose_21)[name = tensor("op_3157_cast")]; tensor _inversed_attn_weights_69_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_69_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_69_cast = mul(x = var_3157_cast, y = _inversed_attn_weights_69_y_0_to_fp16)[name = tensor("_inversed_attn_weights_69_cast")]; tensor input_171_cast = add(x = _inversed_attn_weights_69_cast, y = attention_mask_cast)[name = tensor("input_171_cast")]; tensor var_3161_cast = softmax(axis = var_10, x = input_171_cast)[name = tensor("op_3161_cast")]; tensor attn_output_69_transpose_x_0 = const()[name = tensor("attn_output_69_transpose_x_0"), val = tensor(false)]; tensor attn_output_69_transpose_y_0 = const()[name = tensor("attn_output_69_transpose_y_0"), val = tensor(false)]; tensor attn_output_69_cast = matmul(transpose_x = attn_output_69_transpose_x_0, transpose_y = attn_output_69_transpose_y_0, x = var_3161_cast, y = value_states_71_cast)[name = tensor("attn_output_69_cast")]; tensor var_3164_perm_0 = const()[name = tensor("op_3164_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3166 = const()[name = tensor("op_3166"), val = tensor([1, 128, 2048])]; tensor transpose_20 = transpose(perm = var_3164_perm_0, x = attn_output_69_cast)[name = tensor("transpose_20")]; tensor input_173_cast = reshape(shape = var_3166, x = transpose_20)[name = tensor("input_173_cast")]; tensor model_model_layers_17_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409928128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412025344))), name = tensor("model_model_layers_17_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_122_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_to_fp16_palettized, x = input_173_cast)[name = tensor("linear_122_cast")]; tensor hidden_states_531_cast = add(x = hidden_states_511_cast, y = linear_122_cast)[name = tensor("hidden_states_531_cast")]; tensor var_13_promoted_to_fp16_35 = const()[name = tensor("op_13_promoted_to_fp16_35"), val = tensor(0x1p+1)]; tensor var_3173_cast = pow(x = hidden_states_531_cast, y = var_13_promoted_to_fp16_35)[name = tensor("op_3173_cast")]; tensor var_3174 = const()[name = tensor("op_3174"), val = tensor([-1])]; tensor variance_71_cast = reduce_mean(axes = var_3174, keep_dims = var_23, x = var_3173_cast)[name = tensor("variance_71_cast")]; tensor var_3176_to_fp16 = const()[name = tensor("op_3176_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3177_cast = add(x = variance_71_cast, y = var_3176_to_fp16)[name = tensor("op_3177_cast")]; tensor var_3178_epsilon_0_to_fp16 = const()[name = tensor("op_3178_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3178_cast = rsqrt(epsilon = var_3178_epsilon_0_to_fp16, x = var_3177_cast)[name = tensor("op_3178_cast")]; tensor hidden_states_535_cast = mul(x = hidden_states_531_cast, y = var_3178_cast)[name = tensor("hidden_states_535_cast")]; tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412025472)))]; tensor input_175_cast = mul(x = model_model_layers_17_post_attention_layernorm_weight_to_fp16, y = hidden_states_535_cast)[name = tensor("input_175_cast")]; tensor model_model_layers_17_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412029632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417796864))), name = tensor("model_model_layers_17_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_123_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_17_mlp_gate_proj_weight_to_fp16_palettized, x = input_175_cast)[name = tensor("linear_123_cast")]; tensor var_3190_cast = silu(x = linear_123_cast)[name = tensor("op_3190_cast")]; tensor model_model_layers_17_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417796992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(423564224))), name = tensor("model_model_layers_17_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_124_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_17_mlp_up_proj_weight_to_fp16_palettized, x = input_175_cast)[name = tensor("linear_124_cast")]; tensor input_179_cast = mul(x = var_3190_cast, y = linear_124_cast)[name = tensor("input_179_cast")]; tensor model_model_layers_17_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(423564352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(429331584))), name = tensor("model_model_layers_17_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_125_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_mlp_down_proj_weight_to_fp16_palettized, x = input_179_cast)[name = tensor("linear_125_cast")]; tensor hidden_states_541_cast = add(x = hidden_states_531_cast, y = linear_125_cast)[name = tensor("hidden_states_541_cast")]; tensor var_13_promoted_to_fp16_36 = const()[name = tensor("op_13_promoted_to_fp16_36"), val = tensor(0x1p+1)]; tensor var_3203_cast = pow(x = hidden_states_541_cast, y = var_13_promoted_to_fp16_36)[name = tensor("op_3203_cast")]; tensor var_3204 = const()[name = tensor("op_3204"), val = tensor([-1])]; tensor variance_73_cast = reduce_mean(axes = var_3204, keep_dims = var_23, x = var_3203_cast)[name = tensor("variance_73_cast")]; tensor var_3206_to_fp16 = const()[name = tensor("op_3206_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3207_cast = add(x = variance_73_cast, y = var_3206_to_fp16)[name = tensor("op_3207_cast")]; tensor var_3208_epsilon_0_to_fp16 = const()[name = tensor("op_3208_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3208_cast = rsqrt(epsilon = var_3208_epsilon_0_to_fp16, x = var_3207_cast)[name = tensor("op_3208_cast")]; tensor hidden_states_545_cast = mul(x = hidden_states_541_cast, y = var_3208_cast)[name = tensor("hidden_states_545_cast")]; tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(429331712)))]; tensor hidden_states_549_cast = mul(x = model_model_layers_18_input_layernorm_weight_to_fp16, y = hidden_states_545_cast)[name = tensor("hidden_states_549_cast")]; tensor model_model_layers_18_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(429335872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431433088))), name = tensor("model_model_layers_18_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_126_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_549_cast)[name = tensor("linear_126_cast")]; tensor model_model_layers_18_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431433216))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431695424))), name = tensor("model_model_layers_18_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_127_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_549_cast)[name = tensor("linear_127_cast")]; tensor model_model_layers_18_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431695552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431957760))), name = tensor("model_model_layers_18_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_128_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_549_cast)[name = tensor("linear_128_cast")]; tensor var_3228 = const()[name = tensor("op_3228"), val = tensor([1, 128, 32, 64])]; tensor var_3229_cast = reshape(shape = var_3228, x = linear_126_cast)[name = tensor("op_3229_cast")]; tensor q_37_perm_0 = const()[name = tensor("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3231 = const()[name = tensor("op_3231"), val = tensor([1, 128, 4, 64])]; tensor var_3232_cast = reshape(shape = var_3231, x = linear_127_cast)[name = tensor("op_3232_cast")]; tensor key_states_111_perm_0 = const()[name = tensor("key_states_111_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3234 = const()[name = tensor("op_3234"), val = tensor([1, 128, 4, 64])]; tensor var_3235_cast = reshape(shape = var_3234, x = linear_128_cast)[name = tensor("op_3235_cast")]; tensor hidden_states_555_perm_0 = const()[name = tensor("hidden_states_555_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_19 = transpose(perm = q_37_perm_0, x = var_3229_cast)[name = tensor("transpose_19")]; tensor var_3261_cast = mul(x = transpose_19, y = cos_1_to_fp16_palettized)[name = tensor("op_3261_cast")]; tensor x1_73_begin_0 = const()[name = tensor("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = tensor("x1_73_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_73_end_mask_0 = const()[name = tensor("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73_cast = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = transpose_19)[name = tensor("x1_73_cast")]; tensor x2_73_begin_0 = const()[name = tensor("x2_73_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_73_end_0 = const()[name = tensor("x2_73_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_73_end_mask_0 = const()[name = tensor("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73_cast = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = transpose_19)[name = tensor("x2_73_cast")]; tensor const_317_promoted_to_fp16 = const()[name = tensor("const_317_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3272_cast = mul(x = x2_73_cast, y = const_317_promoted_to_fp16)[name = tensor("op_3272_cast")]; tensor var_3274_interleave_0 = const()[name = tensor("op_3274_interleave_0"), val = tensor(false)]; tensor var_3274_cast = concat(axis = var_10, interleave = var_3274_interleave_0, values = (var_3272_cast, x1_73_cast))[name = tensor("op_3274_cast")]; tensor var_3275_cast = mul(x = var_3274_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3275_cast")]; tensor query_states_75_cast = add(x = var_3261_cast, y = var_3275_cast)[name = tensor("query_states_75_cast")]; tensor transpose_18 = transpose(perm = key_states_111_perm_0, x = var_3232_cast)[name = tensor("transpose_18")]; tensor var_3277_cast = mul(x = transpose_18, y = cos_1_to_fp16_palettized)[name = tensor("op_3277_cast")]; tensor x1_75_begin_0 = const()[name = tensor("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = tensor("x1_75_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_75_end_mask_0 = const()[name = tensor("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75_cast = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = transpose_18)[name = tensor("x1_75_cast")]; tensor x2_75_begin_0 = const()[name = tensor("x2_75_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_75_end_0 = const()[name = tensor("x2_75_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_75_end_mask_0 = const()[name = tensor("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75_cast = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = transpose_18)[name = tensor("x2_75_cast")]; tensor const_320_promoted_to_fp16 = const()[name = tensor("const_320_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3288_cast = mul(x = x2_75_cast, y = const_320_promoted_to_fp16)[name = tensor("op_3288_cast")]; tensor var_3290_interleave_0 = const()[name = tensor("op_3290_interleave_0"), val = tensor(false)]; tensor var_3290_cast = concat(axis = var_10, interleave = var_3290_interleave_0, values = (var_3288_cast, x1_75_cast))[name = tensor("op_3290_cast")]; tensor var_3291_cast = mul(x = var_3290_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3291_cast")]; tensor hidden_states_551_cast = add(x = var_3277_cast, y = var_3291_cast)[name = tensor("hidden_states_551_cast")]; tensor var_3300_axes_0 = const()[name = tensor("op_3300_axes_0"), val = tensor([2])]; tensor var_3300_cast = expand_dims(axes = var_3300_axes_0, x = hidden_states_551_cast)[name = tensor("op_3300_cast")]; tensor hidden_states_553_reps_0 = const()[name = tensor("hidden_states_553_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_553_cast = tile(reps = hidden_states_553_reps_0, x = var_3300_cast)[name = tensor("hidden_states_553_cast")]; tensor var_3308 = const()[name = tensor("op_3308"), val = tensor([1, 32, 128, 64])]; tensor key_states_113_cast = reshape(shape = var_3308, x = hidden_states_553_cast)[name = tensor("key_states_113_cast")]; tensor var_3317_axes_0 = const()[name = tensor("op_3317_axes_0"), val = tensor([2])]; tensor transpose_17 = transpose(perm = hidden_states_555_perm_0, x = var_3235_cast)[name = tensor("transpose_17")]; tensor var_3317_cast = expand_dims(axes = var_3317_axes_0, x = transpose_17)[name = tensor("op_3317_cast")]; tensor hidden_states_557_reps_0 = const()[name = tensor("hidden_states_557_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_557_cast = tile(reps = hidden_states_557_reps_0, x = var_3317_cast)[name = tensor("hidden_states_557_cast")]; tensor var_3325 = const()[name = tensor("op_3325"), val = tensor([1, 32, 128, 64])]; tensor value_states_75_cast = reshape(shape = var_3325, x = hidden_states_557_cast)[name = tensor("value_states_75_cast")]; tensor var_3327_perm_0 = const()[name = tensor("op_3327_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3328_transpose_x_0 = const()[name = tensor("op_3328_transpose_x_0"), val = tensor(false)]; tensor var_3328_transpose_y_0 = const()[name = tensor("op_3328_transpose_y_0"), val = tensor(false)]; tensor transpose_16 = transpose(perm = var_3327_perm_0, x = key_states_113_cast)[name = tensor("transpose_16")]; tensor var_3328_cast = matmul(transpose_x = var_3328_transpose_x_0, transpose_y = var_3328_transpose_y_0, x = query_states_75_cast, y = transpose_16)[name = tensor("op_3328_cast")]; tensor _inversed_attn_weights_73_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_73_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_73_cast = mul(x = var_3328_cast, y = _inversed_attn_weights_73_y_0_to_fp16)[name = tensor("_inversed_attn_weights_73_cast")]; tensor input_181_cast = add(x = _inversed_attn_weights_73_cast, y = attention_mask_cast)[name = tensor("input_181_cast")]; tensor var_3332_cast = softmax(axis = var_10, x = input_181_cast)[name = tensor("op_3332_cast")]; tensor attn_output_73_transpose_x_0 = const()[name = tensor("attn_output_73_transpose_x_0"), val = tensor(false)]; tensor attn_output_73_transpose_y_0 = const()[name = tensor("attn_output_73_transpose_y_0"), val = tensor(false)]; tensor attn_output_73_cast = matmul(transpose_x = attn_output_73_transpose_x_0, transpose_y = attn_output_73_transpose_y_0, x = var_3332_cast, y = value_states_75_cast)[name = tensor("attn_output_73_cast")]; tensor var_3335_perm_0 = const()[name = tensor("op_3335_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3337 = const()[name = tensor("op_3337"), val = tensor([1, 128, 2048])]; tensor transpose_15 = transpose(perm = var_3335_perm_0, x = attn_output_73_cast)[name = tensor("transpose_15")]; tensor input_183_cast = reshape(shape = var_3337, x = transpose_15)[name = tensor("input_183_cast")]; tensor model_model_layers_18_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(431957888))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434055104))), name = tensor("model_model_layers_18_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_129_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_to_fp16_palettized, x = input_183_cast)[name = tensor("linear_129_cast")]; tensor hidden_states_561_cast = add(x = hidden_states_541_cast, y = linear_129_cast)[name = tensor("hidden_states_561_cast")]; tensor var_13_promoted_to_fp16_37 = const()[name = tensor("op_13_promoted_to_fp16_37"), val = tensor(0x1p+1)]; tensor var_3344_cast = pow(x = hidden_states_561_cast, y = var_13_promoted_to_fp16_37)[name = tensor("op_3344_cast")]; tensor var_3345 = const()[name = tensor("op_3345"), val = tensor([-1])]; tensor variance_75_cast = reduce_mean(axes = var_3345, keep_dims = var_23, x = var_3344_cast)[name = tensor("variance_75_cast")]; tensor var_3347_to_fp16 = const()[name = tensor("op_3347_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3348_cast = add(x = variance_75_cast, y = var_3347_to_fp16)[name = tensor("op_3348_cast")]; tensor var_3349_epsilon_0_to_fp16 = const()[name = tensor("op_3349_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3349_cast = rsqrt(epsilon = var_3349_epsilon_0_to_fp16, x = var_3348_cast)[name = tensor("op_3349_cast")]; tensor hidden_states_565_cast = mul(x = hidden_states_561_cast, y = var_3349_cast)[name = tensor("hidden_states_565_cast")]; tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434055232)))]; tensor input_185_cast = mul(x = model_model_layers_18_post_attention_layernorm_weight_to_fp16, y = hidden_states_565_cast)[name = tensor("input_185_cast")]; tensor model_model_layers_18_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434059392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(439826624))), name = tensor("model_model_layers_18_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_130_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_18_mlp_gate_proj_weight_to_fp16_palettized, x = input_185_cast)[name = tensor("linear_130_cast")]; tensor var_3361_cast = silu(x = linear_130_cast)[name = tensor("op_3361_cast")]; tensor model_model_layers_18_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(439826752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(445593984))), name = tensor("model_model_layers_18_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_131_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_18_mlp_up_proj_weight_to_fp16_palettized, x = input_185_cast)[name = tensor("linear_131_cast")]; tensor input_189_cast = mul(x = var_3361_cast, y = linear_131_cast)[name = tensor("input_189_cast")]; tensor model_model_layers_18_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(445594112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(451361344))), name = tensor("model_model_layers_18_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_132_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_mlp_down_proj_weight_to_fp16_palettized, x = input_189_cast)[name = tensor("linear_132_cast")]; tensor hidden_states_571_cast = add(x = hidden_states_561_cast, y = linear_132_cast)[name = tensor("hidden_states_571_cast")]; tensor var_13_promoted_to_fp16_38 = const()[name = tensor("op_13_promoted_to_fp16_38"), val = tensor(0x1p+1)]; tensor var_3374_cast = pow(x = hidden_states_571_cast, y = var_13_promoted_to_fp16_38)[name = tensor("op_3374_cast")]; tensor var_3375 = const()[name = tensor("op_3375"), val = tensor([-1])]; tensor variance_77_cast = reduce_mean(axes = var_3375, keep_dims = var_23, x = var_3374_cast)[name = tensor("variance_77_cast")]; tensor var_3377_to_fp16 = const()[name = tensor("op_3377_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3378_cast = add(x = variance_77_cast, y = var_3377_to_fp16)[name = tensor("op_3378_cast")]; tensor var_3379_epsilon_0_to_fp16 = const()[name = tensor("op_3379_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3379_cast = rsqrt(epsilon = var_3379_epsilon_0_to_fp16, x = var_3378_cast)[name = tensor("op_3379_cast")]; tensor hidden_states_575_cast = mul(x = hidden_states_571_cast, y = var_3379_cast)[name = tensor("hidden_states_575_cast")]; tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(451361472)))]; tensor hidden_states_579_cast = mul(x = model_model_layers_19_input_layernorm_weight_to_fp16, y = hidden_states_575_cast)[name = tensor("hidden_states_579_cast")]; tensor model_model_layers_19_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(451365632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453462848))), name = tensor("model_model_layers_19_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_133_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_579_cast)[name = tensor("linear_133_cast")]; tensor model_model_layers_19_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453462976))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453725184))), name = tensor("model_model_layers_19_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_134_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_579_cast)[name = tensor("linear_134_cast")]; tensor model_model_layers_19_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453725312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453987520))), name = tensor("model_model_layers_19_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_135_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_579_cast)[name = tensor("linear_135_cast")]; tensor var_3399 = const()[name = tensor("op_3399"), val = tensor([1, 128, 32, 64])]; tensor var_3400_cast = reshape(shape = var_3399, x = linear_133_cast)[name = tensor("op_3400_cast")]; tensor q_39_perm_0 = const()[name = tensor("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3402 = const()[name = tensor("op_3402"), val = tensor([1, 128, 4, 64])]; tensor var_3403_cast = reshape(shape = var_3402, x = linear_134_cast)[name = tensor("op_3403_cast")]; tensor key_states_117_perm_0 = const()[name = tensor("key_states_117_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3405 = const()[name = tensor("op_3405"), val = tensor([1, 128, 4, 64])]; tensor var_3406_cast = reshape(shape = var_3405, x = linear_135_cast)[name = tensor("op_3406_cast")]; tensor hidden_states_585_perm_0 = const()[name = tensor("hidden_states_585_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_14 = transpose(perm = q_39_perm_0, x = var_3400_cast)[name = tensor("transpose_14")]; tensor var_3432_cast = mul(x = transpose_14, y = cos_1_to_fp16_palettized)[name = tensor("op_3432_cast")]; tensor x1_77_begin_0 = const()[name = tensor("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = tensor("x1_77_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_77_end_mask_0 = const()[name = tensor("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77_cast = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = transpose_14)[name = tensor("x1_77_cast")]; tensor x2_77_begin_0 = const()[name = tensor("x2_77_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_77_end_0 = const()[name = tensor("x2_77_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_77_end_mask_0 = const()[name = tensor("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77_cast = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = transpose_14)[name = tensor("x2_77_cast")]; tensor const_334_promoted_to_fp16 = const()[name = tensor("const_334_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3443_cast = mul(x = x2_77_cast, y = const_334_promoted_to_fp16)[name = tensor("op_3443_cast")]; tensor var_3445_interleave_0 = const()[name = tensor("op_3445_interleave_0"), val = tensor(false)]; tensor var_3445_cast = concat(axis = var_10, interleave = var_3445_interleave_0, values = (var_3443_cast, x1_77_cast))[name = tensor("op_3445_cast")]; tensor var_3446_cast = mul(x = var_3445_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3446_cast")]; tensor query_states_79_cast = add(x = var_3432_cast, y = var_3446_cast)[name = tensor("query_states_79_cast")]; tensor transpose_13 = transpose(perm = key_states_117_perm_0, x = var_3403_cast)[name = tensor("transpose_13")]; tensor var_3448_cast = mul(x = transpose_13, y = cos_1_to_fp16_palettized)[name = tensor("op_3448_cast")]; tensor x1_79_begin_0 = const()[name = tensor("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = tensor("x1_79_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_79_end_mask_0 = const()[name = tensor("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79_cast = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = transpose_13)[name = tensor("x1_79_cast")]; tensor x2_79_begin_0 = const()[name = tensor("x2_79_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_79_end_0 = const()[name = tensor("x2_79_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_79_end_mask_0 = const()[name = tensor("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79_cast = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = transpose_13)[name = tensor("x2_79_cast")]; tensor const_337_promoted_to_fp16 = const()[name = tensor("const_337_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3459_cast = mul(x = x2_79_cast, y = const_337_promoted_to_fp16)[name = tensor("op_3459_cast")]; tensor var_3461_interleave_0 = const()[name = tensor("op_3461_interleave_0"), val = tensor(false)]; tensor var_3461_cast = concat(axis = var_10, interleave = var_3461_interleave_0, values = (var_3459_cast, x1_79_cast))[name = tensor("op_3461_cast")]; tensor var_3462_cast = mul(x = var_3461_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3462_cast")]; tensor hidden_states_581_cast = add(x = var_3448_cast, y = var_3462_cast)[name = tensor("hidden_states_581_cast")]; tensor var_3471_axes_0 = const()[name = tensor("op_3471_axes_0"), val = tensor([2])]; tensor var_3471_cast = expand_dims(axes = var_3471_axes_0, x = hidden_states_581_cast)[name = tensor("op_3471_cast")]; tensor hidden_states_583_reps_0 = const()[name = tensor("hidden_states_583_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_583_cast = tile(reps = hidden_states_583_reps_0, x = var_3471_cast)[name = tensor("hidden_states_583_cast")]; tensor var_3479 = const()[name = tensor("op_3479"), val = tensor([1, 32, 128, 64])]; tensor key_states_119_cast = reshape(shape = var_3479, x = hidden_states_583_cast)[name = tensor("key_states_119_cast")]; tensor var_3488_axes_0 = const()[name = tensor("op_3488_axes_0"), val = tensor([2])]; tensor transpose_12 = transpose(perm = hidden_states_585_perm_0, x = var_3406_cast)[name = tensor("transpose_12")]; tensor var_3488_cast = expand_dims(axes = var_3488_axes_0, x = transpose_12)[name = tensor("op_3488_cast")]; tensor hidden_states_587_reps_0 = const()[name = tensor("hidden_states_587_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_587_cast = tile(reps = hidden_states_587_reps_0, x = var_3488_cast)[name = tensor("hidden_states_587_cast")]; tensor var_3496 = const()[name = tensor("op_3496"), val = tensor([1, 32, 128, 64])]; tensor value_states_79_cast = reshape(shape = var_3496, x = hidden_states_587_cast)[name = tensor("value_states_79_cast")]; tensor var_3498_perm_0 = const()[name = tensor("op_3498_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3499_transpose_x_0 = const()[name = tensor("op_3499_transpose_x_0"), val = tensor(false)]; tensor var_3499_transpose_y_0 = const()[name = tensor("op_3499_transpose_y_0"), val = tensor(false)]; tensor transpose_11 = transpose(perm = var_3498_perm_0, x = key_states_119_cast)[name = tensor("transpose_11")]; tensor var_3499_cast = matmul(transpose_x = var_3499_transpose_x_0, transpose_y = var_3499_transpose_y_0, x = query_states_79_cast, y = transpose_11)[name = tensor("op_3499_cast")]; tensor _inversed_attn_weights_77_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_77_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_77_cast = mul(x = var_3499_cast, y = _inversed_attn_weights_77_y_0_to_fp16)[name = tensor("_inversed_attn_weights_77_cast")]; tensor input_191_cast = add(x = _inversed_attn_weights_77_cast, y = attention_mask_cast)[name = tensor("input_191_cast")]; tensor var_3503_cast = softmax(axis = var_10, x = input_191_cast)[name = tensor("op_3503_cast")]; tensor attn_output_77_transpose_x_0 = const()[name = tensor("attn_output_77_transpose_x_0"), val = tensor(false)]; tensor attn_output_77_transpose_y_0 = const()[name = tensor("attn_output_77_transpose_y_0"), val = tensor(false)]; tensor attn_output_77_cast = matmul(transpose_x = attn_output_77_transpose_x_0, transpose_y = attn_output_77_transpose_y_0, x = var_3503_cast, y = value_states_79_cast)[name = tensor("attn_output_77_cast")]; tensor var_3506_perm_0 = const()[name = tensor("op_3506_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3508 = const()[name = tensor("op_3508"), val = tensor([1, 128, 2048])]; tensor transpose_10 = transpose(perm = var_3506_perm_0, x = attn_output_77_cast)[name = tensor("transpose_10")]; tensor input_193_cast = reshape(shape = var_3508, x = transpose_10)[name = tensor("input_193_cast")]; tensor model_model_layers_19_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453987648))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456084864))), name = tensor("model_model_layers_19_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_136_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_to_fp16_palettized, x = input_193_cast)[name = tensor("linear_136_cast")]; tensor hidden_states_591_cast = add(x = hidden_states_571_cast, y = linear_136_cast)[name = tensor("hidden_states_591_cast")]; tensor var_13_promoted_to_fp16_39 = const()[name = tensor("op_13_promoted_to_fp16_39"), val = tensor(0x1p+1)]; tensor var_3515_cast = pow(x = hidden_states_591_cast, y = var_13_promoted_to_fp16_39)[name = tensor("op_3515_cast")]; tensor var_3516 = const()[name = tensor("op_3516"), val = tensor([-1])]; tensor variance_79_cast = reduce_mean(axes = var_3516, keep_dims = var_23, x = var_3515_cast)[name = tensor("variance_79_cast")]; tensor var_3518_to_fp16 = const()[name = tensor("op_3518_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3519_cast = add(x = variance_79_cast, y = var_3518_to_fp16)[name = tensor("op_3519_cast")]; tensor var_3520_epsilon_0_to_fp16 = const()[name = tensor("op_3520_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3520_cast = rsqrt(epsilon = var_3520_epsilon_0_to_fp16, x = var_3519_cast)[name = tensor("op_3520_cast")]; tensor hidden_states_595_cast = mul(x = hidden_states_591_cast, y = var_3520_cast)[name = tensor("hidden_states_595_cast")]; tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456084992)))]; tensor input_195_cast = mul(x = model_model_layers_19_post_attention_layernorm_weight_to_fp16, y = hidden_states_595_cast)[name = tensor("input_195_cast")]; tensor model_model_layers_19_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(456089152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(461856384))), name = tensor("model_model_layers_19_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_137_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_19_mlp_gate_proj_weight_to_fp16_palettized, x = input_195_cast)[name = tensor("linear_137_cast")]; tensor var_3532_cast = silu(x = linear_137_cast)[name = tensor("op_3532_cast")]; tensor model_model_layers_19_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(461856512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(467623744))), name = tensor("model_model_layers_19_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_138_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_19_mlp_up_proj_weight_to_fp16_palettized, x = input_195_cast)[name = tensor("linear_138_cast")]; tensor input_199_cast = mul(x = var_3532_cast, y = linear_138_cast)[name = tensor("input_199_cast")]; tensor model_model_layers_19_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(467623872))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473391104))), name = tensor("model_model_layers_19_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_139_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_mlp_down_proj_weight_to_fp16_palettized, x = input_199_cast)[name = tensor("linear_139_cast")]; tensor hidden_states_601_cast = add(x = hidden_states_591_cast, y = linear_139_cast)[name = tensor("hidden_states_601_cast")]; tensor var_13_promoted_to_fp16_40 = const()[name = tensor("op_13_promoted_to_fp16_40"), val = tensor(0x1p+1)]; tensor var_3545_cast = pow(x = hidden_states_601_cast, y = var_13_promoted_to_fp16_40)[name = tensor("op_3545_cast")]; tensor var_3546 = const()[name = tensor("op_3546"), val = tensor([-1])]; tensor variance_81_cast = reduce_mean(axes = var_3546, keep_dims = var_23, x = var_3545_cast)[name = tensor("variance_81_cast")]; tensor var_3548_to_fp16 = const()[name = tensor("op_3548_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3549_cast = add(x = variance_81_cast, y = var_3548_to_fp16)[name = tensor("op_3549_cast")]; tensor var_3550_epsilon_0_to_fp16 = const()[name = tensor("op_3550_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3550_cast = rsqrt(epsilon = var_3550_epsilon_0_to_fp16, x = var_3549_cast)[name = tensor("op_3550_cast")]; tensor hidden_states_605_cast = mul(x = hidden_states_601_cast, y = var_3550_cast)[name = tensor("hidden_states_605_cast")]; tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473391232)))]; tensor hidden_states_609_cast = mul(x = model_model_layers_20_input_layernorm_weight_to_fp16, y = hidden_states_605_cast)[name = tensor("hidden_states_609_cast")]; tensor model_model_layers_20_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473395392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(475492608))), name = tensor("model_model_layers_20_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_140_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_609_cast)[name = tensor("linear_140_cast")]; tensor model_model_layers_20_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(475492736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(475754944))), name = tensor("model_model_layers_20_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_141_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_609_cast)[name = tensor("linear_141_cast")]; tensor model_model_layers_20_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(475755072))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(476017280))), name = tensor("model_model_layers_20_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_142_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_609_cast)[name = tensor("linear_142_cast")]; tensor var_3570 = const()[name = tensor("op_3570"), val = tensor([1, 128, 32, 64])]; tensor var_3571_cast = reshape(shape = var_3570, x = linear_140_cast)[name = tensor("op_3571_cast")]; tensor q_41_perm_0 = const()[name = tensor("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3573 = const()[name = tensor("op_3573"), val = tensor([1, 128, 4, 64])]; tensor var_3574_cast = reshape(shape = var_3573, x = linear_141_cast)[name = tensor("op_3574_cast")]; tensor key_states_123_perm_0 = const()[name = tensor("key_states_123_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3576 = const()[name = tensor("op_3576"), val = tensor([1, 128, 4, 64])]; tensor var_3577_cast = reshape(shape = var_3576, x = linear_142_cast)[name = tensor("op_3577_cast")]; tensor hidden_states_615_perm_0 = const()[name = tensor("hidden_states_615_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_9 = transpose(perm = q_41_perm_0, x = var_3571_cast)[name = tensor("transpose_9")]; tensor var_3603_cast = mul(x = transpose_9, y = cos_1_to_fp16_palettized)[name = tensor("op_3603_cast")]; tensor x1_81_begin_0 = const()[name = tensor("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = tensor("x1_81_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_81_end_mask_0 = const()[name = tensor("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81_cast = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = transpose_9)[name = tensor("x1_81_cast")]; tensor x2_81_begin_0 = const()[name = tensor("x2_81_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_81_end_0 = const()[name = tensor("x2_81_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_81_end_mask_0 = const()[name = tensor("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81_cast = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = transpose_9)[name = tensor("x2_81_cast")]; tensor const_351_promoted_to_fp16 = const()[name = tensor("const_351_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3614_cast = mul(x = x2_81_cast, y = const_351_promoted_to_fp16)[name = tensor("op_3614_cast")]; tensor var_3616_interleave_0 = const()[name = tensor("op_3616_interleave_0"), val = tensor(false)]; tensor var_3616_cast = concat(axis = var_10, interleave = var_3616_interleave_0, values = (var_3614_cast, x1_81_cast))[name = tensor("op_3616_cast")]; tensor var_3617_cast = mul(x = var_3616_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3617_cast")]; tensor query_states_83_cast = add(x = var_3603_cast, y = var_3617_cast)[name = tensor("query_states_83_cast")]; tensor transpose_8 = transpose(perm = key_states_123_perm_0, x = var_3574_cast)[name = tensor("transpose_8")]; tensor var_3619_cast = mul(x = transpose_8, y = cos_1_to_fp16_palettized)[name = tensor("op_3619_cast")]; tensor x1_83_begin_0 = const()[name = tensor("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = tensor("x1_83_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_83_end_mask_0 = const()[name = tensor("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83_cast = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = transpose_8)[name = tensor("x1_83_cast")]; tensor x2_83_begin_0 = const()[name = tensor("x2_83_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_83_end_0 = const()[name = tensor("x2_83_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_83_end_mask_0 = const()[name = tensor("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83_cast = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = transpose_8)[name = tensor("x2_83_cast")]; tensor const_354_promoted_to_fp16 = const()[name = tensor("const_354_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3630_cast = mul(x = x2_83_cast, y = const_354_promoted_to_fp16)[name = tensor("op_3630_cast")]; tensor var_3632_interleave_0 = const()[name = tensor("op_3632_interleave_0"), val = tensor(false)]; tensor var_3632_cast = concat(axis = var_10, interleave = var_3632_interleave_0, values = (var_3630_cast, x1_83_cast))[name = tensor("op_3632_cast")]; tensor var_3633_cast = mul(x = var_3632_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3633_cast")]; tensor hidden_states_611_cast = add(x = var_3619_cast, y = var_3633_cast)[name = tensor("hidden_states_611_cast")]; tensor var_3642_axes_0 = const()[name = tensor("op_3642_axes_0"), val = tensor([2])]; tensor var_3642_cast = expand_dims(axes = var_3642_axes_0, x = hidden_states_611_cast)[name = tensor("op_3642_cast")]; tensor hidden_states_613_reps_0 = const()[name = tensor("hidden_states_613_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_613_cast = tile(reps = hidden_states_613_reps_0, x = var_3642_cast)[name = tensor("hidden_states_613_cast")]; tensor var_3650 = const()[name = tensor("op_3650"), val = tensor([1, 32, 128, 64])]; tensor key_states_125_cast = reshape(shape = var_3650, x = hidden_states_613_cast)[name = tensor("key_states_125_cast")]; tensor var_3659_axes_0 = const()[name = tensor("op_3659_axes_0"), val = tensor([2])]; tensor transpose_7 = transpose(perm = hidden_states_615_perm_0, x = var_3577_cast)[name = tensor("transpose_7")]; tensor var_3659_cast = expand_dims(axes = var_3659_axes_0, x = transpose_7)[name = tensor("op_3659_cast")]; tensor hidden_states_617_reps_0 = const()[name = tensor("hidden_states_617_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_617_cast = tile(reps = hidden_states_617_reps_0, x = var_3659_cast)[name = tensor("hidden_states_617_cast")]; tensor var_3667 = const()[name = tensor("op_3667"), val = tensor([1, 32, 128, 64])]; tensor value_states_83_cast = reshape(shape = var_3667, x = hidden_states_617_cast)[name = tensor("value_states_83_cast")]; tensor var_3669_perm_0 = const()[name = tensor("op_3669_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3670_transpose_x_0 = const()[name = tensor("op_3670_transpose_x_0"), val = tensor(false)]; tensor var_3670_transpose_y_0 = const()[name = tensor("op_3670_transpose_y_0"), val = tensor(false)]; tensor transpose_6 = transpose(perm = var_3669_perm_0, x = key_states_125_cast)[name = tensor("transpose_6")]; tensor var_3670_cast = matmul(transpose_x = var_3670_transpose_x_0, transpose_y = var_3670_transpose_y_0, x = query_states_83_cast, y = transpose_6)[name = tensor("op_3670_cast")]; tensor _inversed_attn_weights_81_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_81_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_81_cast = mul(x = var_3670_cast, y = _inversed_attn_weights_81_y_0_to_fp16)[name = tensor("_inversed_attn_weights_81_cast")]; tensor input_201_cast = add(x = _inversed_attn_weights_81_cast, y = attention_mask_cast)[name = tensor("input_201_cast")]; tensor var_3674_cast = softmax(axis = var_10, x = input_201_cast)[name = tensor("op_3674_cast")]; tensor attn_output_81_transpose_x_0 = const()[name = tensor("attn_output_81_transpose_x_0"), val = tensor(false)]; tensor attn_output_81_transpose_y_0 = const()[name = tensor("attn_output_81_transpose_y_0"), val = tensor(false)]; tensor attn_output_81_cast = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = var_3674_cast, y = value_states_83_cast)[name = tensor("attn_output_81_cast")]; tensor var_3677_perm_0 = const()[name = tensor("op_3677_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3679 = const()[name = tensor("op_3679"), val = tensor([1, 128, 2048])]; tensor transpose_5 = transpose(perm = var_3677_perm_0, x = attn_output_81_cast)[name = tensor("transpose_5")]; tensor input_203_cast = reshape(shape = var_3679, x = transpose_5)[name = tensor("input_203_cast")]; tensor model_model_layers_20_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(476017408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(478114624))), name = tensor("model_model_layers_20_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_143_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_to_fp16_palettized, x = input_203_cast)[name = tensor("linear_143_cast")]; tensor hidden_states_621_cast = add(x = hidden_states_601_cast, y = linear_143_cast)[name = tensor("hidden_states_621_cast")]; tensor var_13_promoted_to_fp16_41 = const()[name = tensor("op_13_promoted_to_fp16_41"), val = tensor(0x1p+1)]; tensor var_3686_cast = pow(x = hidden_states_621_cast, y = var_13_promoted_to_fp16_41)[name = tensor("op_3686_cast")]; tensor var_3687 = const()[name = tensor("op_3687"), val = tensor([-1])]; tensor variance_83_cast = reduce_mean(axes = var_3687, keep_dims = var_23, x = var_3686_cast)[name = tensor("variance_83_cast")]; tensor var_3689_to_fp16 = const()[name = tensor("op_3689_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3690_cast = add(x = variance_83_cast, y = var_3689_to_fp16)[name = tensor("op_3690_cast")]; tensor var_3691_epsilon_0_to_fp16 = const()[name = tensor("op_3691_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3691_cast = rsqrt(epsilon = var_3691_epsilon_0_to_fp16, x = var_3690_cast)[name = tensor("op_3691_cast")]; tensor hidden_states_625_cast = mul(x = hidden_states_621_cast, y = var_3691_cast)[name = tensor("hidden_states_625_cast")]; tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(478114752)))]; tensor input_205_cast = mul(x = model_model_layers_20_post_attention_layernorm_weight_to_fp16, y = hidden_states_625_cast)[name = tensor("input_205_cast")]; tensor model_model_layers_20_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(478118912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483886144))), name = tensor("model_model_layers_20_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_144_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_20_mlp_gate_proj_weight_to_fp16_palettized, x = input_205_cast)[name = tensor("linear_144_cast")]; tensor var_3703_cast = silu(x = linear_144_cast)[name = tensor("op_3703_cast")]; tensor model_model_layers_20_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(483886272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489653504))), name = tensor("model_model_layers_20_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_145_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_20_mlp_up_proj_weight_to_fp16_palettized, x = input_205_cast)[name = tensor("linear_145_cast")]; tensor input_209_cast = mul(x = var_3703_cast, y = linear_145_cast)[name = tensor("input_209_cast")]; tensor model_model_layers_20_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489653632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495420864))), name = tensor("model_model_layers_20_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_146_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_mlp_down_proj_weight_to_fp16_palettized, x = input_209_cast)[name = tensor("linear_146_cast")]; tensor hidden_states_631_cast = add(x = hidden_states_621_cast, y = linear_146_cast)[name = tensor("hidden_states_631_cast")]; tensor var_13_promoted_to_fp16_42 = const()[name = tensor("op_13_promoted_to_fp16_42"), val = tensor(0x1p+1)]; tensor var_3716_cast = pow(x = hidden_states_631_cast, y = var_13_promoted_to_fp16_42)[name = tensor("op_3716_cast")]; tensor var_3717 = const()[name = tensor("op_3717"), val = tensor([-1])]; tensor variance_85_cast = reduce_mean(axes = var_3717, keep_dims = var_23, x = var_3716_cast)[name = tensor("variance_85_cast")]; tensor var_3719_to_fp16 = const()[name = tensor("op_3719_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3720_cast = add(x = variance_85_cast, y = var_3719_to_fp16)[name = tensor("op_3720_cast")]; tensor var_3721_epsilon_0_to_fp16 = const()[name = tensor("op_3721_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3721_cast = rsqrt(epsilon = var_3721_epsilon_0_to_fp16, x = var_3720_cast)[name = tensor("op_3721_cast")]; tensor hidden_states_635_cast = mul(x = hidden_states_631_cast, y = var_3721_cast)[name = tensor("hidden_states_635_cast")]; tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495420992)))]; tensor hidden_states_639_cast = mul(x = model_model_layers_21_input_layernorm_weight_to_fp16, y = hidden_states_635_cast)[name = tensor("hidden_states_639_cast")]; tensor model_model_layers_21_self_attn_q_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(495425152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497522368))), name = tensor("model_model_layers_21_self_attn_q_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_147_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_q_proj_weight_to_fp16_palettized, x = hidden_states_639_cast)[name = tensor("linear_147_cast")]; tensor model_model_layers_21_self_attn_k_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497522496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497784704))), name = tensor("model_model_layers_21_self_attn_k_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_148_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_k_proj_weight_to_fp16_palettized, x = hidden_states_639_cast)[name = tensor("linear_148_cast")]; tensor model_model_layers_21_self_attn_v_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(497784832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(498047040))), name = tensor("model_model_layers_21_self_attn_v_proj_weight_to_fp16_palettized"), shape = tensor([256, 2048])]; tensor linear_149_cast = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_v_proj_weight_to_fp16_palettized, x = hidden_states_639_cast)[name = tensor("linear_149_cast")]; tensor var_3741 = const()[name = tensor("op_3741"), val = tensor([1, 128, 32, 64])]; tensor var_3742_cast = reshape(shape = var_3741, x = linear_147_cast)[name = tensor("op_3742_cast")]; tensor q_perm_0 = const()[name = tensor("q_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3744 = const()[name = tensor("op_3744"), val = tensor([1, 128, 4, 64])]; tensor var_3745_cast = reshape(shape = var_3744, x = linear_148_cast)[name = tensor("op_3745_cast")]; tensor key_states_129_perm_0 = const()[name = tensor("key_states_129_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3747 = const()[name = tensor("op_3747"), val = tensor([1, 128, 4, 64])]; tensor var_3748_cast = reshape(shape = var_3747, x = linear_149_cast)[name = tensor("op_3748_cast")]; tensor hidden_states_645_perm_0 = const()[name = tensor("hidden_states_645_perm_0"), val = tensor([0, 2, 1, 3])]; tensor transpose_4 = transpose(perm = q_perm_0, x = var_3742_cast)[name = tensor("transpose_4")]; tensor var_3774_cast = mul(x = transpose_4, y = cos_1_to_fp16_palettized)[name = tensor("op_3774_cast")]; tensor x1_85_begin_0 = const()[name = tensor("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = tensor("x1_85_end_0"), val = tensor([1, 32, 128, 32])]; tensor x1_85_end_mask_0 = const()[name = tensor("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85_cast = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = transpose_4)[name = tensor("x1_85_cast")]; tensor x2_85_begin_0 = const()[name = tensor("x2_85_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_85_end_0 = const()[name = tensor("x2_85_end_0"), val = tensor([1, 32, 128, 64])]; tensor x2_85_end_mask_0 = const()[name = tensor("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85_cast = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = transpose_4)[name = tensor("x2_85_cast")]; tensor const_368_promoted_to_fp16 = const()[name = tensor("const_368_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3785_cast = mul(x = x2_85_cast, y = const_368_promoted_to_fp16)[name = tensor("op_3785_cast")]; tensor var_3787_interleave_0 = const()[name = tensor("op_3787_interleave_0"), val = tensor(false)]; tensor var_3787_cast = concat(axis = var_10, interleave = var_3787_interleave_0, values = (var_3785_cast, x1_85_cast))[name = tensor("op_3787_cast")]; tensor var_3788_cast = mul(x = var_3787_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3788_cast")]; tensor query_states_cast = add(x = var_3774_cast, y = var_3788_cast)[name = tensor("query_states_cast")]; tensor transpose_3 = transpose(perm = key_states_129_perm_0, x = var_3745_cast)[name = tensor("transpose_3")]; tensor var_3790_cast = mul(x = transpose_3, y = cos_1_to_fp16_palettized)[name = tensor("op_3790_cast")]; tensor x1_begin_0 = const()[name = tensor("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = tensor("x1_end_0"), val = tensor([1, 4, 128, 32])]; tensor x1_end_mask_0 = const()[name = tensor("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = transpose_3)[name = tensor("x1_cast")]; tensor x2_begin_0 = const()[name = tensor("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = tensor("x2_end_0"), val = tensor([1, 4, 128, 64])]; tensor x2_end_mask_0 = const()[name = tensor("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = transpose_3)[name = tensor("x2_cast")]; tensor const_371_promoted_to_fp16 = const()[name = tensor("const_371_promoted_to_fp16"), val = tensor(-0x1p+0)]; tensor var_3801_cast = mul(x = x2_cast, y = const_371_promoted_to_fp16)[name = tensor("op_3801_cast")]; tensor var_3803_interleave_0 = const()[name = tensor("op_3803_interleave_0"), val = tensor(false)]; tensor var_3803_cast = concat(axis = var_10, interleave = var_3803_interleave_0, values = (var_3801_cast, x1_cast))[name = tensor("op_3803_cast")]; tensor var_3804_cast = mul(x = var_3803_cast, y = sin_1_to_fp16_palettized)[name = tensor("op_3804_cast")]; tensor hidden_states_641_cast = add(x = var_3790_cast, y = var_3804_cast)[name = tensor("hidden_states_641_cast")]; tensor var_3813_axes_0 = const()[name = tensor("op_3813_axes_0"), val = tensor([2])]; tensor var_3813_cast = expand_dims(axes = var_3813_axes_0, x = hidden_states_641_cast)[name = tensor("op_3813_cast")]; tensor hidden_states_643_reps_0 = const()[name = tensor("hidden_states_643_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_643_cast = tile(reps = hidden_states_643_reps_0, x = var_3813_cast)[name = tensor("hidden_states_643_cast")]; tensor var_3821 = const()[name = tensor("op_3821"), val = tensor([1, 32, 128, 64])]; tensor key_states_cast = reshape(shape = var_3821, x = hidden_states_643_cast)[name = tensor("key_states_cast")]; tensor var_3830_axes_0 = const()[name = tensor("op_3830_axes_0"), val = tensor([2])]; tensor transpose_2 = transpose(perm = hidden_states_645_perm_0, x = var_3748_cast)[name = tensor("transpose_2")]; tensor var_3830_cast = expand_dims(axes = var_3830_axes_0, x = transpose_2)[name = tensor("op_3830_cast")]; tensor hidden_states_647_reps_0 = const()[name = tensor("hidden_states_647_reps_0"), val = tensor([1, 1, 8, 1, 1])]; tensor hidden_states_647_cast = tile(reps = hidden_states_647_reps_0, x = var_3830_cast)[name = tensor("hidden_states_647_cast")]; tensor var_3838 = const()[name = tensor("op_3838"), val = tensor([1, 32, 128, 64])]; tensor value_states_cast = reshape(shape = var_3838, x = hidden_states_647_cast)[name = tensor("value_states_cast")]; tensor var_3840_perm_0 = const()[name = tensor("op_3840_perm_0"), val = tensor([0, 1, 3, 2])]; tensor var_3841_transpose_x_0 = const()[name = tensor("op_3841_transpose_x_0"), val = tensor(false)]; tensor var_3841_transpose_y_0 = const()[name = tensor("op_3841_transpose_y_0"), val = tensor(false)]; tensor transpose_1 = transpose(perm = var_3840_perm_0, x = key_states_cast)[name = tensor("transpose_1")]; tensor var_3841_cast = matmul(transpose_x = var_3841_transpose_x_0, transpose_y = var_3841_transpose_y_0, x = query_states_cast, y = transpose_1)[name = tensor("op_3841_cast")]; tensor _inversed_attn_weights_85_y_0_to_fp16 = const()[name = tensor("_inversed_attn_weights_85_y_0_to_fp16"), val = tensor(0x1p-3)]; tensor _inversed_attn_weights_85_cast = mul(x = var_3841_cast, y = _inversed_attn_weights_85_y_0_to_fp16)[name = tensor("_inversed_attn_weights_85_cast")]; tensor input_211_cast = add(x = _inversed_attn_weights_85_cast, y = attention_mask_cast)[name = tensor("input_211_cast")]; tensor var_3845_cast = softmax(axis = var_10, x = input_211_cast)[name = tensor("op_3845_cast")]; tensor attn_output_85_transpose_x_0 = const()[name = tensor("attn_output_85_transpose_x_0"), val = tensor(false)]; tensor attn_output_85_transpose_y_0 = const()[name = tensor("attn_output_85_transpose_y_0"), val = tensor(false)]; tensor attn_output_85_cast = matmul(transpose_x = attn_output_85_transpose_x_0, transpose_y = attn_output_85_transpose_y_0, x = var_3845_cast, y = value_states_cast)[name = tensor("attn_output_85_cast")]; tensor var_3848_perm_0 = const()[name = tensor("op_3848_perm_0"), val = tensor([0, 2, 1, 3])]; tensor var_3850 = const()[name = tensor("op_3850"), val = tensor([1, 128, 2048])]; tensor transpose_0 = transpose(perm = var_3848_perm_0, x = attn_output_85_cast)[name = tensor("transpose_0")]; tensor input_213_cast = reshape(shape = var_3850, x = transpose_0)[name = tensor("input_213_cast")]; tensor model_model_layers_21_self_attn_o_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(498047168))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(500144384))), name = tensor("model_model_layers_21_self_attn_o_proj_weight_to_fp16_palettized"), shape = tensor([2048, 2048])]; tensor linear_150_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_to_fp16_palettized, x = input_213_cast)[name = tensor("linear_150_cast")]; tensor hidden_states_651_cast = add(x = hidden_states_631_cast, y = linear_150_cast)[name = tensor("hidden_states_651_cast")]; tensor var_13_promoted_to_fp16_43 = const()[name = tensor("op_13_promoted_to_fp16_43"), val = tensor(0x1p+1)]; tensor var_3857_cast = pow(x = hidden_states_651_cast, y = var_13_promoted_to_fp16_43)[name = tensor("op_3857_cast")]; tensor var_3858 = const()[name = tensor("op_3858"), val = tensor([-1])]; tensor variance_87_cast = reduce_mean(axes = var_3858, keep_dims = var_23, x = var_3857_cast)[name = tensor("variance_87_cast")]; tensor var_3860_to_fp16 = const()[name = tensor("op_3860_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3861_cast = add(x = variance_87_cast, y = var_3860_to_fp16)[name = tensor("op_3861_cast")]; tensor var_3862_epsilon_0_to_fp16 = const()[name = tensor("op_3862_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3862_cast = rsqrt(epsilon = var_3862_epsilon_0_to_fp16, x = var_3861_cast)[name = tensor("op_3862_cast")]; tensor hidden_states_655_cast = mul(x = hidden_states_651_cast, y = var_3862_cast)[name = tensor("hidden_states_655_cast")]; tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = tensor("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(500144512)))]; tensor input_215_cast = mul(x = model_model_layers_21_post_attention_layernorm_weight_to_fp16, y = hidden_states_655_cast)[name = tensor("input_215_cast")]; tensor model_model_layers_21_mlp_gate_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(500148672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505915904))), name = tensor("model_model_layers_21_mlp_gate_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_151_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_21_mlp_gate_proj_weight_to_fp16_palettized, x = input_215_cast)[name = tensor("linear_151_cast")]; tensor var_3874_cast = silu(x = linear_151_cast)[name = tensor("op_3874_cast")]; tensor model_model_layers_21_mlp_up_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(505916032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511683264))), name = tensor("model_model_layers_21_mlp_up_proj_weight_to_fp16_palettized"), shape = tensor([5632, 2048])]; tensor linear_152_cast = linear(bias = linear_4_bias_0_to_fp16_palettized, weight = model_model_layers_21_mlp_up_proj_weight_to_fp16_palettized, x = input_215_cast)[name = tensor("linear_152_cast")]; tensor input_219_cast = mul(x = var_3874_cast, y = linear_152_cast)[name = tensor("input_219_cast")]; tensor model_model_layers_21_mlp_down_proj_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(511683392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517450624))), name = tensor("model_model_layers_21_mlp_down_proj_weight_to_fp16_palettized"), shape = tensor([2048, 5632])]; tensor linear_153_cast = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_mlp_down_proj_weight_to_fp16_palettized, x = input_219_cast)[name = tensor("linear_153_cast")]; tensor hidden_states_661_cast = add(x = hidden_states_651_cast, y = linear_153_cast)[name = tensor("hidden_states_661_cast")]; tensor var_13_promoted_to_fp16_44 = const()[name = tensor("op_13_promoted_to_fp16_44"), val = tensor(0x1p+1)]; tensor var_3883_cast = pow(x = hidden_states_661_cast, y = var_13_promoted_to_fp16_44)[name = tensor("op_3883_cast")]; tensor var_3884 = const()[name = tensor("op_3884"), val = tensor([-1])]; tensor variance_cast = reduce_mean(axes = var_3884, keep_dims = var_23, x = var_3883_cast)[name = tensor("variance_cast")]; tensor var_3886_to_fp16 = const()[name = tensor("op_3886_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_3887_cast = add(x = variance_cast, y = var_3886_to_fp16)[name = tensor("op_3887_cast")]; tensor var_3888_epsilon_0_to_fp16 = const()[name = tensor("op_3888_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; tensor var_3888_cast = rsqrt(epsilon = var_3888_epsilon_0_to_fp16, x = var_3887_cast)[name = tensor("op_3888_cast")]; tensor hidden_states_665_cast = mul(x = hidden_states_661_cast, y = var_3888_cast)[name = tensor("hidden_states_665_cast")]; tensor model_model_norm_weight_to_fp16 = const()[name = tensor("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517450752)))]; tensor input_cast = mul(x = model_model_norm_weight_to_fp16, y = hidden_states_665_cast)[name = tensor("input_cast")]; tensor model_lm_head_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(517454912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550226048))), name = tensor("model_lm_head_weight_to_fp16_palettized"), shape = tensor([32003, 2048])]; tensor linear_154_bias_0_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550226176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(550242304))), name = tensor("linear_154_bias_0_to_fp16_palettized"), shape = tensor([32003])]; tensor linear_154_cast = linear(bias = linear_154_bias_0_to_fp16_palettized, weight = model_lm_head_weight_to_fp16_palettized, x = input_cast)[name = tensor("linear_154_cast")]; tensor linear_154_cast_to_fp32_dtype_0 = const()[name = tensor("linear_154_cast_to_fp32_dtype_0"), val = tensor("fp32")]; tensor logits = cast(dtype = linear_154_cast_to_fp32_dtype_0, x = linear_154_cast)[name = tensor("cast_0")]; } -> (logits); }