{ "config": {}, "structure": { "conv_in": { "weight": "conv_in.weight", "bias": "conv_in.bias", "weight_quant": { "scale": "conv_in.weight_scale", "zero_point": "conv_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "conv_in.input_scale", "zero_point": "conv_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "conv_in.output_scale", "zero_point": "conv_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_proj": { "type": "Timesteps" }, "time_embedding": { "linear_1": { "weight": "time_embedding.linear_1.weight", "bias": "time_embedding.linear_1.bias", "weight_quant": { "scale": "time_embedding.linear_1.weight_scale", "zero_point": "time_embedding.linear_1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "time_embedding.linear_1.input_scale", "zero_point": "time_embedding.linear_1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "time_embedding.linear_1.output_scale", "zero_point": "time_embedding.linear_1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "act": { "type": "SiLU" }, "linear_2": { "weight": "time_embedding.linear_2.weight", "bias": "time_embedding.linear_2.bias", "weight_quant": { "scale": "time_embedding.linear_2.weight_scale", "zero_point": "time_embedding.linear_2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "time_embedding.linear_2.input_scale", "zero_point": "time_embedding.linear_2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "time_embedding.linear_2.output_scale", "zero_point": "time_embedding.linear_2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "add_time_proj": { "type": "Timesteps" }, "add_embedding": { "linear_1": { "weight": "add_embedding.linear_1.weight", "bias": "add_embedding.linear_1.bias", "weight_quant": { "scale": "add_embedding.linear_1.weight_scale", "zero_point": "add_embedding.linear_1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "add_embedding.linear_1.input_scale", "zero_point": "add_embedding.linear_1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "add_embedding.linear_1.output_scale", "zero_point": "add_embedding.linear_1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "act": { "type": "SiLU" }, "linear_2": { "weight": "add_embedding.linear_2.weight", "bias": "add_embedding.linear_2.bias", "weight_quant": { "scale": "add_embedding.linear_2.weight_scale", "zero_point": "add_embedding.linear_2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "add_embedding.linear_2.input_scale", "zero_point": "add_embedding.linear_2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "add_embedding.linear_2.output_scale", "zero_point": "add_embedding.linear_2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "down_blocks": { "0": { "resnets": { "0": { "norm1": { "weight": "down_blocks.0.resnets.0.norm1.weight", "bias": "down_blocks.0.resnets.0.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "down_blocks.0.resnets.0.conv1.weight", "bias": "down_blocks.0.resnets.0.conv1.bias", "weight_quant": { "scale": "down_blocks.0.resnets.0.conv1.weight_scale", "zero_point": "down_blocks.0.resnets.0.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.0.resnets.0.conv1.input_scale", "zero_point": "down_blocks.0.resnets.0.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.0.resnets.0.conv1.output_scale", "zero_point": "down_blocks.0.resnets.0.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "down_blocks.0.resnets.0.time_emb_proj.weight", "bias": "down_blocks.0.resnets.0.time_emb_proj.bias", "weight_quant": { "scale": "down_blocks.0.resnets.0.time_emb_proj.weight_scale", "zero_point": "down_blocks.0.resnets.0.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.0.resnets.0.time_emb_proj.input_scale", "zero_point": "down_blocks.0.resnets.0.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.0.resnets.0.time_emb_proj.output_scale", "zero_point": "down_blocks.0.resnets.0.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "down_blocks.0.resnets.0.norm2.weight", "bias": "down_blocks.0.resnets.0.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "down_blocks.0.resnets.0.conv2.weight", "bias": "down_blocks.0.resnets.0.conv2.bias", "weight_quant": { "scale": "down_blocks.0.resnets.0.conv2.weight_scale", "zero_point": "down_blocks.0.resnets.0.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.0.resnets.0.conv2.input_scale", "zero_point": "down_blocks.0.resnets.0.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.0.resnets.0.conv2.output_scale", "zero_point": "down_blocks.0.resnets.0.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" } }, "1": { "norm1": { "weight": "down_blocks.0.resnets.1.norm1.weight", "bias": "down_blocks.0.resnets.1.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "down_blocks.0.resnets.1.conv1.weight", "bias": "down_blocks.0.resnets.1.conv1.bias", "weight_quant": { "scale": "down_blocks.0.resnets.1.conv1.weight_scale", "zero_point": "down_blocks.0.resnets.1.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.0.resnets.1.conv1.input_scale", "zero_point": "down_blocks.0.resnets.1.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.0.resnets.1.conv1.output_scale", "zero_point": "down_blocks.0.resnets.1.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "down_blocks.0.resnets.1.time_emb_proj.weight", "bias": "down_blocks.0.resnets.1.time_emb_proj.bias", "weight_quant": { "scale": "down_blocks.0.resnets.1.time_emb_proj.weight_scale", "zero_point": "down_blocks.0.resnets.1.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.0.resnets.1.time_emb_proj.input_scale", "zero_point": "down_blocks.0.resnets.1.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.0.resnets.1.time_emb_proj.output_scale", "zero_point": "down_blocks.0.resnets.1.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "down_blocks.0.resnets.1.norm2.weight", "bias": "down_blocks.0.resnets.1.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "down_blocks.0.resnets.1.conv2.weight", "bias": "down_blocks.0.resnets.1.conv2.bias", "weight_quant": { "scale": "down_blocks.0.resnets.1.conv2.weight_scale", "zero_point": "down_blocks.0.resnets.1.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.0.resnets.1.conv2.input_scale", "zero_point": "down_blocks.0.resnets.1.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.0.resnets.1.conv2.output_scale", "zero_point": "down_blocks.0.resnets.1.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" } } }, "downsamplers": { "0": { "conv": { "weight": "down_blocks.0.downsamplers.0.conv.weight", "bias": "down_blocks.0.downsamplers.0.conv.bias", "weight_quant": { "scale": "down_blocks.0.downsamplers.0.conv.weight_scale", "zero_point": "down_blocks.0.downsamplers.0.conv.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.0.downsamplers.0.conv.input_scale", "zero_point": "down_blocks.0.downsamplers.0.conv.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.0.downsamplers.0.conv.output_scale", "zero_point": "down_blocks.0.downsamplers.0.conv.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } } }, "1": { "attentions": { "0": { "norm": { "weight": "down_blocks.1.attentions.0.norm.weight", "bias": "down_blocks.1.attentions.0.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "down_blocks.1.attentions.0.proj_in.weight", "bias": "down_blocks.1.attentions.0.proj_in.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.proj_in.weight_scale", "zero_point": "down_blocks.1.attentions.0.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.proj_in.input_scale", "zero_point": "down_blocks.1.attentions.0.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.proj_in.output_scale", "zero_point": "down_blocks.1.attentions.0.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.norm1.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.norm2.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.norm3.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight", "bias": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "down_blocks.1.attentions.0.proj_out.weight", "bias": "down_blocks.1.attentions.0.proj_out.bias", "weight_quant": { "scale": "down_blocks.1.attentions.0.proj_out.weight_scale", "zero_point": "down_blocks.1.attentions.0.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.0.proj_out.input_scale", "zero_point": "down_blocks.1.attentions.0.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.0.proj_out.output_scale", "zero_point": "down_blocks.1.attentions.0.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "norm": { "weight": "down_blocks.1.attentions.1.norm.weight", "bias": "down_blocks.1.attentions.1.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "down_blocks.1.attentions.1.proj_in.weight", "bias": "down_blocks.1.attentions.1.proj_in.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.proj_in.weight_scale", "zero_point": "down_blocks.1.attentions.1.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.proj_in.input_scale", "zero_point": "down_blocks.1.attentions.1.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.proj_in.output_scale", "zero_point": "down_blocks.1.attentions.1.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.norm1.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.norm2.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.norm3.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight", "bias": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "down_blocks.1.attentions.1.proj_out.weight", "bias": "down_blocks.1.attentions.1.proj_out.bias", "weight_quant": { "scale": "down_blocks.1.attentions.1.proj_out.weight_scale", "zero_point": "down_blocks.1.attentions.1.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.attentions.1.proj_out.input_scale", "zero_point": "down_blocks.1.attentions.1.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.attentions.1.proj_out.output_scale", "zero_point": "down_blocks.1.attentions.1.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } }, "resnets": { "0": { "norm1": { "weight": "down_blocks.1.resnets.0.norm1.weight", "bias": "down_blocks.1.resnets.0.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "down_blocks.1.resnets.0.conv1.weight", "bias": "down_blocks.1.resnets.0.conv1.bias", "weight_quant": { "scale": "down_blocks.1.resnets.0.conv1.weight_scale", "zero_point": "down_blocks.1.resnets.0.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.resnets.0.conv1.input_scale", "zero_point": "down_blocks.1.resnets.0.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.resnets.0.conv1.output_scale", "zero_point": "down_blocks.1.resnets.0.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "down_blocks.1.resnets.0.time_emb_proj.weight", "bias": "down_blocks.1.resnets.0.time_emb_proj.bias", "weight_quant": { "scale": "down_blocks.1.resnets.0.time_emb_proj.weight_scale", "zero_point": "down_blocks.1.resnets.0.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.resnets.0.time_emb_proj.input_scale", "zero_point": "down_blocks.1.resnets.0.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.resnets.0.time_emb_proj.output_scale", "zero_point": "down_blocks.1.resnets.0.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "down_blocks.1.resnets.0.norm2.weight", "bias": "down_blocks.1.resnets.0.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "down_blocks.1.resnets.0.conv2.weight", "bias": "down_blocks.1.resnets.0.conv2.bias", "weight_quant": { "scale": "down_blocks.1.resnets.0.conv2.weight_scale", "zero_point": "down_blocks.1.resnets.0.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.resnets.0.conv2.input_scale", "zero_point": "down_blocks.1.resnets.0.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.resnets.0.conv2.output_scale", "zero_point": "down_blocks.1.resnets.0.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "down_blocks.1.resnets.0.conv_shortcut.weight", "bias": "down_blocks.1.resnets.0.conv_shortcut.bias", "weight_quant": { "scale": "down_blocks.1.resnets.0.conv_shortcut.weight_scale", "zero_point": "down_blocks.1.resnets.0.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.resnets.0.conv_shortcut.input_scale", "zero_point": "down_blocks.1.resnets.0.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.resnets.0.conv_shortcut.output_scale", "zero_point": "down_blocks.1.resnets.0.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "1": { "norm1": { "weight": "down_blocks.1.resnets.1.norm1.weight", "bias": "down_blocks.1.resnets.1.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "down_blocks.1.resnets.1.conv1.weight", "bias": "down_blocks.1.resnets.1.conv1.bias", "weight_quant": { "scale": "down_blocks.1.resnets.1.conv1.weight_scale", "zero_point": "down_blocks.1.resnets.1.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.resnets.1.conv1.input_scale", "zero_point": "down_blocks.1.resnets.1.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.resnets.1.conv1.output_scale", "zero_point": "down_blocks.1.resnets.1.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "down_blocks.1.resnets.1.time_emb_proj.weight", "bias": "down_blocks.1.resnets.1.time_emb_proj.bias", "weight_quant": { "scale": "down_blocks.1.resnets.1.time_emb_proj.weight_scale", "zero_point": "down_blocks.1.resnets.1.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.resnets.1.time_emb_proj.input_scale", "zero_point": "down_blocks.1.resnets.1.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.resnets.1.time_emb_proj.output_scale", "zero_point": "down_blocks.1.resnets.1.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "down_blocks.1.resnets.1.norm2.weight", "bias": "down_blocks.1.resnets.1.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "down_blocks.1.resnets.1.conv2.weight", "bias": "down_blocks.1.resnets.1.conv2.bias", "weight_quant": { "scale": "down_blocks.1.resnets.1.conv2.weight_scale", "zero_point": "down_blocks.1.resnets.1.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.resnets.1.conv2.input_scale", "zero_point": "down_blocks.1.resnets.1.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.resnets.1.conv2.output_scale", "zero_point": "down_blocks.1.resnets.1.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" } } }, "downsamplers": { "0": { "conv": { "weight": "down_blocks.1.downsamplers.0.conv.weight", "bias": "down_blocks.1.downsamplers.0.conv.bias", "weight_quant": { "scale": "down_blocks.1.downsamplers.0.conv.weight_scale", "zero_point": "down_blocks.1.downsamplers.0.conv.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.1.downsamplers.0.conv.input_scale", "zero_point": "down_blocks.1.downsamplers.0.conv.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.1.downsamplers.0.conv.output_scale", "zero_point": "down_blocks.1.downsamplers.0.conv.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } } }, "2": { "attentions": { "0": { "norm": { "weight": "down_blocks.2.attentions.0.norm.weight", "bias": "down_blocks.2.attentions.0.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "down_blocks.2.attentions.0.proj_in.weight", "bias": "down_blocks.2.attentions.0.proj_in.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.proj_in.weight_scale", "zero_point": "down_blocks.2.attentions.0.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.proj_in.input_scale", "zero_point": "down_blocks.2.attentions.0.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.proj_in.output_scale", "zero_point": "down_blocks.2.attentions.0.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "2": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.2.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.2.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.2.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "3": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.3.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.3.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.3.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "4": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.4.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.4.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.4.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "5": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.5.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.5.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.5.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "6": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.6.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.6.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.6.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "7": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.7.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.7.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.7.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "8": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.8.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.8.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.8.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "9": { "norm1": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.norm1.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.9.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.norm2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.9.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.norm3.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.9.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.weight", "bias": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "down_blocks.2.attentions.0.proj_out.weight", "bias": "down_blocks.2.attentions.0.proj_out.bias", "weight_quant": { "scale": "down_blocks.2.attentions.0.proj_out.weight_scale", "zero_point": "down_blocks.2.attentions.0.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.0.proj_out.input_scale", "zero_point": "down_blocks.2.attentions.0.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.0.proj_out.output_scale", "zero_point": "down_blocks.2.attentions.0.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "norm": { "weight": "down_blocks.2.attentions.1.norm.weight", "bias": "down_blocks.2.attentions.1.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "down_blocks.2.attentions.1.proj_in.weight", "bias": "down_blocks.2.attentions.1.proj_in.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.proj_in.weight_scale", "zero_point": "down_blocks.2.attentions.1.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.proj_in.input_scale", "zero_point": "down_blocks.2.attentions.1.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.proj_in.output_scale", "zero_point": "down_blocks.2.attentions.1.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "2": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.2.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.2.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.2.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "3": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.3.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.3.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.3.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "4": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.4.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.4.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.4.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "5": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.5.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.5.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.5.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "6": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.6.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.6.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.6.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "7": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.7.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.7.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.7.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "8": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.8.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.8.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.8.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "9": { "norm1": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.norm1.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.9.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.norm2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.9.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.weight", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.norm3.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.9.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.weight", "bias": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.weight_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.input_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.output_scale", "zero_point": "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "down_blocks.2.attentions.1.proj_out.weight", "bias": "down_blocks.2.attentions.1.proj_out.bias", "weight_quant": { "scale": "down_blocks.2.attentions.1.proj_out.weight_scale", "zero_point": "down_blocks.2.attentions.1.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.attentions.1.proj_out.input_scale", "zero_point": "down_blocks.2.attentions.1.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.attentions.1.proj_out.output_scale", "zero_point": "down_blocks.2.attentions.1.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } }, "resnets": { "0": { "norm1": { "weight": "down_blocks.2.resnets.0.norm1.weight", "bias": "down_blocks.2.resnets.0.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "down_blocks.2.resnets.0.conv1.weight", "bias": "down_blocks.2.resnets.0.conv1.bias", "weight_quant": { "scale": "down_blocks.2.resnets.0.conv1.weight_scale", "zero_point": "down_blocks.2.resnets.0.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.resnets.0.conv1.input_scale", "zero_point": "down_blocks.2.resnets.0.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.resnets.0.conv1.output_scale", "zero_point": "down_blocks.2.resnets.0.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "down_blocks.2.resnets.0.time_emb_proj.weight", "bias": "down_blocks.2.resnets.0.time_emb_proj.bias", "weight_quant": { "scale": "down_blocks.2.resnets.0.time_emb_proj.weight_scale", "zero_point": "down_blocks.2.resnets.0.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.resnets.0.time_emb_proj.input_scale", "zero_point": "down_blocks.2.resnets.0.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.resnets.0.time_emb_proj.output_scale", "zero_point": "down_blocks.2.resnets.0.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "down_blocks.2.resnets.0.norm2.weight", "bias": "down_blocks.2.resnets.0.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "down_blocks.2.resnets.0.conv2.weight", "bias": "down_blocks.2.resnets.0.conv2.bias", "weight_quant": { "scale": "down_blocks.2.resnets.0.conv2.weight_scale", "zero_point": "down_blocks.2.resnets.0.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.resnets.0.conv2.input_scale", "zero_point": "down_blocks.2.resnets.0.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.resnets.0.conv2.output_scale", "zero_point": "down_blocks.2.resnets.0.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "down_blocks.2.resnets.0.conv_shortcut.weight", "bias": "down_blocks.2.resnets.0.conv_shortcut.bias", "weight_quant": { "scale": "down_blocks.2.resnets.0.conv_shortcut.weight_scale", "zero_point": "down_blocks.2.resnets.0.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.resnets.0.conv_shortcut.input_scale", "zero_point": "down_blocks.2.resnets.0.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.resnets.0.conv_shortcut.output_scale", "zero_point": "down_blocks.2.resnets.0.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "1": { "norm1": { "weight": "down_blocks.2.resnets.1.norm1.weight", "bias": "down_blocks.2.resnets.1.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "down_blocks.2.resnets.1.conv1.weight", "bias": "down_blocks.2.resnets.1.conv1.bias", "weight_quant": { "scale": "down_blocks.2.resnets.1.conv1.weight_scale", "zero_point": "down_blocks.2.resnets.1.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.resnets.1.conv1.input_scale", "zero_point": "down_blocks.2.resnets.1.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.resnets.1.conv1.output_scale", "zero_point": "down_blocks.2.resnets.1.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "down_blocks.2.resnets.1.time_emb_proj.weight", "bias": "down_blocks.2.resnets.1.time_emb_proj.bias", "weight_quant": { "scale": "down_blocks.2.resnets.1.time_emb_proj.weight_scale", "zero_point": "down_blocks.2.resnets.1.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.resnets.1.time_emb_proj.input_scale", "zero_point": "down_blocks.2.resnets.1.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.resnets.1.time_emb_proj.output_scale", "zero_point": "down_blocks.2.resnets.1.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "down_blocks.2.resnets.1.norm2.weight", "bias": "down_blocks.2.resnets.1.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "down_blocks.2.resnets.1.conv2.weight", "bias": "down_blocks.2.resnets.1.conv2.bias", "weight_quant": { "scale": "down_blocks.2.resnets.1.conv2.weight_scale", "zero_point": "down_blocks.2.resnets.1.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "down_blocks.2.resnets.1.conv2.input_scale", "zero_point": "down_blocks.2.resnets.1.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "down_blocks.2.resnets.1.conv2.output_scale", "zero_point": "down_blocks.2.resnets.1.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" } } } } }, "up_blocks": { "0": { "attentions": { "0": { "norm": { "weight": "up_blocks.0.attentions.0.norm.weight", "bias": "up_blocks.0.attentions.0.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "up_blocks.0.attentions.0.proj_in.weight", "bias": "up_blocks.0.attentions.0.proj_in.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.proj_in.weight_scale", "zero_point": "up_blocks.0.attentions.0.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.proj_in.input_scale", "zero_point": "up_blocks.0.attentions.0.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.proj_in.output_scale", "zero_point": "up_blocks.0.attentions.0.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "2": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.2.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.2.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.2.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "3": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.3.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.3.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.3.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "4": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.4.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.4.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.4.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "5": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.5.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.5.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.5.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "6": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.6.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.6.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.6.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "7": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.7.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.7.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.7.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "8": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.8.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.8.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.8.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "9": { "norm1": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.norm1.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.9.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.norm2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.9.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.norm3.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.9.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.weight", "bias": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "up_blocks.0.attentions.0.proj_out.weight", "bias": "up_blocks.0.attentions.0.proj_out.bias", "weight_quant": { "scale": "up_blocks.0.attentions.0.proj_out.weight_scale", "zero_point": "up_blocks.0.attentions.0.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.0.proj_out.input_scale", "zero_point": "up_blocks.0.attentions.0.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.0.proj_out.output_scale", "zero_point": "up_blocks.0.attentions.0.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "norm": { "weight": "up_blocks.0.attentions.1.norm.weight", "bias": "up_blocks.0.attentions.1.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "up_blocks.0.attentions.1.proj_in.weight", "bias": "up_blocks.0.attentions.1.proj_in.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.proj_in.weight_scale", "zero_point": "up_blocks.0.attentions.1.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.proj_in.input_scale", "zero_point": "up_blocks.0.attentions.1.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.proj_in.output_scale", "zero_point": "up_blocks.0.attentions.1.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "2": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.2.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.2.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.2.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "3": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.3.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.3.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.3.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "4": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.4.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.4.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.4.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "5": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.5.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.5.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.5.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "6": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.6.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.6.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.6.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "7": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.7.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.7.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.7.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "8": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.8.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.8.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.8.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "9": { "norm1": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.norm1.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.9.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.norm2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.9.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.norm3.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.9.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.weight", "bias": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "up_blocks.0.attentions.1.proj_out.weight", "bias": "up_blocks.0.attentions.1.proj_out.bias", "weight_quant": { "scale": "up_blocks.0.attentions.1.proj_out.weight_scale", "zero_point": "up_blocks.0.attentions.1.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.1.proj_out.input_scale", "zero_point": "up_blocks.0.attentions.1.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.1.proj_out.output_scale", "zero_point": "up_blocks.0.attentions.1.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "2": { "norm": { "weight": "up_blocks.0.attentions.2.norm.weight", "bias": "up_blocks.0.attentions.2.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "up_blocks.0.attentions.2.proj_in.weight", "bias": "up_blocks.0.attentions.2.proj_in.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.proj_in.weight_scale", "zero_point": "up_blocks.0.attentions.2.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.proj_in.input_scale", "zero_point": "up_blocks.0.attentions.2.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.proj_in.output_scale", "zero_point": "up_blocks.0.attentions.2.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "2": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.2.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.2.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.2.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "3": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.3.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.3.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.3.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "4": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.4.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.4.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.4.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "5": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.5.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.5.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.5.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "6": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.6.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.6.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.6.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "7": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.7.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.7.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.7.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "8": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.8.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.8.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.8.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "9": { "norm1": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.norm1.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.9.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.norm2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.9.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.norm3.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.9.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.weight", "bias": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.weight_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.input_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.output_scale", "zero_point": "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "up_blocks.0.attentions.2.proj_out.weight", "bias": "up_blocks.0.attentions.2.proj_out.bias", "weight_quant": { "scale": "up_blocks.0.attentions.2.proj_out.weight_scale", "zero_point": "up_blocks.0.attentions.2.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.attentions.2.proj_out.input_scale", "zero_point": "up_blocks.0.attentions.2.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.attentions.2.proj_out.output_scale", "zero_point": "up_blocks.0.attentions.2.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } }, "resnets": { "0": { "norm1": { "weight": "up_blocks.0.resnets.0.norm1.weight", "bias": "up_blocks.0.resnets.0.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.0.resnets.0.conv1.weight", "bias": "up_blocks.0.resnets.0.conv1.bias", "weight_quant": { "scale": "up_blocks.0.resnets.0.conv1.weight_scale", "zero_point": "up_blocks.0.resnets.0.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.0.conv1.input_scale", "zero_point": "up_blocks.0.resnets.0.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.0.conv1.output_scale", "zero_point": "up_blocks.0.resnets.0.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.0.resnets.0.time_emb_proj.weight", "bias": "up_blocks.0.resnets.0.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.0.resnets.0.time_emb_proj.weight_scale", "zero_point": "up_blocks.0.resnets.0.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.0.time_emb_proj.input_scale", "zero_point": "up_blocks.0.resnets.0.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.0.time_emb_proj.output_scale", "zero_point": "up_blocks.0.resnets.0.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.0.resnets.0.norm2.weight", "bias": "up_blocks.0.resnets.0.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.0.resnets.0.conv2.weight", "bias": "up_blocks.0.resnets.0.conv2.bias", "weight_quant": { "scale": "up_blocks.0.resnets.0.conv2.weight_scale", "zero_point": "up_blocks.0.resnets.0.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.0.conv2.input_scale", "zero_point": "up_blocks.0.resnets.0.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.0.conv2.output_scale", "zero_point": "up_blocks.0.resnets.0.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.0.resnets.0.conv_shortcut.weight", "bias": "up_blocks.0.resnets.0.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.0.resnets.0.conv_shortcut.weight_scale", "zero_point": "up_blocks.0.resnets.0.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.0.conv_shortcut.input_scale", "zero_point": "up_blocks.0.resnets.0.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.0.conv_shortcut.output_scale", "zero_point": "up_blocks.0.resnets.0.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "1": { "norm1": { "weight": "up_blocks.0.resnets.1.norm1.weight", "bias": "up_blocks.0.resnets.1.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.0.resnets.1.conv1.weight", "bias": "up_blocks.0.resnets.1.conv1.bias", "weight_quant": { "scale": "up_blocks.0.resnets.1.conv1.weight_scale", "zero_point": "up_blocks.0.resnets.1.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.1.conv1.input_scale", "zero_point": "up_blocks.0.resnets.1.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.1.conv1.output_scale", "zero_point": "up_blocks.0.resnets.1.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.0.resnets.1.time_emb_proj.weight", "bias": "up_blocks.0.resnets.1.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.0.resnets.1.time_emb_proj.weight_scale", "zero_point": "up_blocks.0.resnets.1.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.1.time_emb_proj.input_scale", "zero_point": "up_blocks.0.resnets.1.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.1.time_emb_proj.output_scale", "zero_point": "up_blocks.0.resnets.1.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.0.resnets.1.norm2.weight", "bias": "up_blocks.0.resnets.1.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.0.resnets.1.conv2.weight", "bias": "up_blocks.0.resnets.1.conv2.bias", "weight_quant": { "scale": "up_blocks.0.resnets.1.conv2.weight_scale", "zero_point": "up_blocks.0.resnets.1.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.1.conv2.input_scale", "zero_point": "up_blocks.0.resnets.1.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.1.conv2.output_scale", "zero_point": "up_blocks.0.resnets.1.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.0.resnets.1.conv_shortcut.weight", "bias": "up_blocks.0.resnets.1.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.0.resnets.1.conv_shortcut.weight_scale", "zero_point": "up_blocks.0.resnets.1.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.1.conv_shortcut.input_scale", "zero_point": "up_blocks.0.resnets.1.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.1.conv_shortcut.output_scale", "zero_point": "up_blocks.0.resnets.1.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "2": { "norm1": { "weight": "up_blocks.0.resnets.2.norm1.weight", "bias": "up_blocks.0.resnets.2.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.0.resnets.2.conv1.weight", "bias": "up_blocks.0.resnets.2.conv1.bias", "weight_quant": { "scale": "up_blocks.0.resnets.2.conv1.weight_scale", "zero_point": "up_blocks.0.resnets.2.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.2.conv1.input_scale", "zero_point": "up_blocks.0.resnets.2.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.2.conv1.output_scale", "zero_point": "up_blocks.0.resnets.2.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.0.resnets.2.time_emb_proj.weight", "bias": "up_blocks.0.resnets.2.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.0.resnets.2.time_emb_proj.weight_scale", "zero_point": "up_blocks.0.resnets.2.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.2.time_emb_proj.input_scale", "zero_point": "up_blocks.0.resnets.2.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.2.time_emb_proj.output_scale", "zero_point": "up_blocks.0.resnets.2.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.0.resnets.2.norm2.weight", "bias": "up_blocks.0.resnets.2.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.0.resnets.2.conv2.weight", "bias": "up_blocks.0.resnets.2.conv2.bias", "weight_quant": { "scale": "up_blocks.0.resnets.2.conv2.weight_scale", "zero_point": "up_blocks.0.resnets.2.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.2.conv2.input_scale", "zero_point": "up_blocks.0.resnets.2.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.2.conv2.output_scale", "zero_point": "up_blocks.0.resnets.2.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.0.resnets.2.conv_shortcut.weight", "bias": "up_blocks.0.resnets.2.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.0.resnets.2.conv_shortcut.weight_scale", "zero_point": "up_blocks.0.resnets.2.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.resnets.2.conv_shortcut.input_scale", "zero_point": "up_blocks.0.resnets.2.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.resnets.2.conv_shortcut.output_scale", "zero_point": "up_blocks.0.resnets.2.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } }, "upsamplers": { "0": { "conv": { "weight": "up_blocks.0.upsamplers.0.conv.weight", "bias": "up_blocks.0.upsamplers.0.conv.bias", "weight_quant": { "scale": "up_blocks.0.upsamplers.0.conv.weight_scale", "zero_point": "up_blocks.0.upsamplers.0.conv.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.0.upsamplers.0.conv.input_scale", "zero_point": "up_blocks.0.upsamplers.0.conv.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.0.upsamplers.0.conv.output_scale", "zero_point": "up_blocks.0.upsamplers.0.conv.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } } }, "1": { "attentions": { "0": { "norm": { "weight": "up_blocks.1.attentions.0.norm.weight", "bias": "up_blocks.1.attentions.0.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "up_blocks.1.attentions.0.proj_in.weight", "bias": "up_blocks.1.attentions.0.proj_in.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.proj_in.weight_scale", "zero_point": "up_blocks.1.attentions.0.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.proj_in.input_scale", "zero_point": "up_blocks.1.attentions.0.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.proj_in.output_scale", "zero_point": "up_blocks.1.attentions.0.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.norm1.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.norm2.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.norm3.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight", "bias": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "up_blocks.1.attentions.0.proj_out.weight", "bias": "up_blocks.1.attentions.0.proj_out.bias", "weight_quant": { "scale": "up_blocks.1.attentions.0.proj_out.weight_scale", "zero_point": "up_blocks.1.attentions.0.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.0.proj_out.input_scale", "zero_point": "up_blocks.1.attentions.0.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.0.proj_out.output_scale", "zero_point": "up_blocks.1.attentions.0.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "norm": { "weight": "up_blocks.1.attentions.1.norm.weight", "bias": "up_blocks.1.attentions.1.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "up_blocks.1.attentions.1.proj_in.weight", "bias": "up_blocks.1.attentions.1.proj_in.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.proj_in.weight_scale", "zero_point": "up_blocks.1.attentions.1.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.proj_in.input_scale", "zero_point": "up_blocks.1.attentions.1.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.proj_in.output_scale", "zero_point": "up_blocks.1.attentions.1.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.norm1.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.norm2.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.norm3.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight", "bias": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "up_blocks.1.attentions.1.proj_out.weight", "bias": "up_blocks.1.attentions.1.proj_out.bias", "weight_quant": { "scale": "up_blocks.1.attentions.1.proj_out.weight_scale", "zero_point": "up_blocks.1.attentions.1.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.1.proj_out.input_scale", "zero_point": "up_blocks.1.attentions.1.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.1.proj_out.output_scale", "zero_point": "up_blocks.1.attentions.1.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "2": { "norm": { "weight": "up_blocks.1.attentions.2.norm.weight", "bias": "up_blocks.1.attentions.2.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "up_blocks.1.attentions.2.proj_in.weight", "bias": "up_blocks.1.attentions.2.proj_in.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.proj_in.weight_scale", "zero_point": "up_blocks.1.attentions.2.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.proj_in.input_scale", "zero_point": "up_blocks.1.attentions.2.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.proj_in.output_scale", "zero_point": "up_blocks.1.attentions.2.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.norm1.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.norm2.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.norm3.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.weight", "bias": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "up_blocks.1.attentions.2.proj_out.weight", "bias": "up_blocks.1.attentions.2.proj_out.bias", "weight_quant": { "scale": "up_blocks.1.attentions.2.proj_out.weight_scale", "zero_point": "up_blocks.1.attentions.2.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.attentions.2.proj_out.input_scale", "zero_point": "up_blocks.1.attentions.2.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.attentions.2.proj_out.output_scale", "zero_point": "up_blocks.1.attentions.2.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } }, "resnets": { "0": { "norm1": { "weight": "up_blocks.1.resnets.0.norm1.weight", "bias": "up_blocks.1.resnets.0.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.1.resnets.0.conv1.weight", "bias": "up_blocks.1.resnets.0.conv1.bias", "weight_quant": { "scale": "up_blocks.1.resnets.0.conv1.weight_scale", "zero_point": "up_blocks.1.resnets.0.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.0.conv1.input_scale", "zero_point": "up_blocks.1.resnets.0.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.0.conv1.output_scale", "zero_point": "up_blocks.1.resnets.0.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.1.resnets.0.time_emb_proj.weight", "bias": "up_blocks.1.resnets.0.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.1.resnets.0.time_emb_proj.weight_scale", "zero_point": "up_blocks.1.resnets.0.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.0.time_emb_proj.input_scale", "zero_point": "up_blocks.1.resnets.0.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.0.time_emb_proj.output_scale", "zero_point": "up_blocks.1.resnets.0.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.1.resnets.0.norm2.weight", "bias": "up_blocks.1.resnets.0.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.1.resnets.0.conv2.weight", "bias": "up_blocks.1.resnets.0.conv2.bias", "weight_quant": { "scale": "up_blocks.1.resnets.0.conv2.weight_scale", "zero_point": "up_blocks.1.resnets.0.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.0.conv2.input_scale", "zero_point": "up_blocks.1.resnets.0.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.0.conv2.output_scale", "zero_point": "up_blocks.1.resnets.0.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.1.resnets.0.conv_shortcut.weight", "bias": "up_blocks.1.resnets.0.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.1.resnets.0.conv_shortcut.weight_scale", "zero_point": "up_blocks.1.resnets.0.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.0.conv_shortcut.input_scale", "zero_point": "up_blocks.1.resnets.0.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.0.conv_shortcut.output_scale", "zero_point": "up_blocks.1.resnets.0.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "1": { "norm1": { "weight": "up_blocks.1.resnets.1.norm1.weight", "bias": "up_blocks.1.resnets.1.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.1.resnets.1.conv1.weight", "bias": "up_blocks.1.resnets.1.conv1.bias", "weight_quant": { "scale": "up_blocks.1.resnets.1.conv1.weight_scale", "zero_point": "up_blocks.1.resnets.1.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.1.conv1.input_scale", "zero_point": "up_blocks.1.resnets.1.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.1.conv1.output_scale", "zero_point": "up_blocks.1.resnets.1.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.1.resnets.1.time_emb_proj.weight", "bias": "up_blocks.1.resnets.1.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.1.resnets.1.time_emb_proj.weight_scale", "zero_point": "up_blocks.1.resnets.1.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.1.time_emb_proj.input_scale", "zero_point": "up_blocks.1.resnets.1.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.1.time_emb_proj.output_scale", "zero_point": "up_blocks.1.resnets.1.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.1.resnets.1.norm2.weight", "bias": "up_blocks.1.resnets.1.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.1.resnets.1.conv2.weight", "bias": "up_blocks.1.resnets.1.conv2.bias", "weight_quant": { "scale": "up_blocks.1.resnets.1.conv2.weight_scale", "zero_point": "up_blocks.1.resnets.1.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.1.conv2.input_scale", "zero_point": "up_blocks.1.resnets.1.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.1.conv2.output_scale", "zero_point": "up_blocks.1.resnets.1.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.1.resnets.1.conv_shortcut.weight", "bias": "up_blocks.1.resnets.1.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.1.resnets.1.conv_shortcut.weight_scale", "zero_point": "up_blocks.1.resnets.1.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.1.conv_shortcut.input_scale", "zero_point": "up_blocks.1.resnets.1.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.1.conv_shortcut.output_scale", "zero_point": "up_blocks.1.resnets.1.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "2": { "norm1": { "weight": "up_blocks.1.resnets.2.norm1.weight", "bias": "up_blocks.1.resnets.2.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.1.resnets.2.conv1.weight", "bias": "up_blocks.1.resnets.2.conv1.bias", "weight_quant": { "scale": "up_blocks.1.resnets.2.conv1.weight_scale", "zero_point": "up_blocks.1.resnets.2.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.2.conv1.input_scale", "zero_point": "up_blocks.1.resnets.2.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.2.conv1.output_scale", "zero_point": "up_blocks.1.resnets.2.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.1.resnets.2.time_emb_proj.weight", "bias": "up_blocks.1.resnets.2.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.1.resnets.2.time_emb_proj.weight_scale", "zero_point": "up_blocks.1.resnets.2.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.2.time_emb_proj.input_scale", "zero_point": "up_blocks.1.resnets.2.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.2.time_emb_proj.output_scale", "zero_point": "up_blocks.1.resnets.2.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.1.resnets.2.norm2.weight", "bias": "up_blocks.1.resnets.2.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.1.resnets.2.conv2.weight", "bias": "up_blocks.1.resnets.2.conv2.bias", "weight_quant": { "scale": "up_blocks.1.resnets.2.conv2.weight_scale", "zero_point": "up_blocks.1.resnets.2.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.2.conv2.input_scale", "zero_point": "up_blocks.1.resnets.2.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.2.conv2.output_scale", "zero_point": "up_blocks.1.resnets.2.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.1.resnets.2.conv_shortcut.weight", "bias": "up_blocks.1.resnets.2.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.1.resnets.2.conv_shortcut.weight_scale", "zero_point": "up_blocks.1.resnets.2.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.resnets.2.conv_shortcut.input_scale", "zero_point": "up_blocks.1.resnets.2.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.resnets.2.conv_shortcut.output_scale", "zero_point": "up_blocks.1.resnets.2.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } }, "upsamplers": { "0": { "conv": { "weight": "up_blocks.1.upsamplers.0.conv.weight", "bias": "up_blocks.1.upsamplers.0.conv.bias", "weight_quant": { "scale": "up_blocks.1.upsamplers.0.conv.weight_scale", "zero_point": "up_blocks.1.upsamplers.0.conv.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.1.upsamplers.0.conv.input_scale", "zero_point": "up_blocks.1.upsamplers.0.conv.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.1.upsamplers.0.conv.output_scale", "zero_point": "up_blocks.1.upsamplers.0.conv.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } } }, "2": { "resnets": { "0": { "norm1": { "weight": "up_blocks.2.resnets.0.norm1.weight", "bias": "up_blocks.2.resnets.0.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.2.resnets.0.conv1.weight", "bias": "up_blocks.2.resnets.0.conv1.bias", "weight_quant": { "scale": "up_blocks.2.resnets.0.conv1.weight_scale", "zero_point": "up_blocks.2.resnets.0.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.0.conv1.input_scale", "zero_point": "up_blocks.2.resnets.0.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.0.conv1.output_scale", "zero_point": "up_blocks.2.resnets.0.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.2.resnets.0.time_emb_proj.weight", "bias": "up_blocks.2.resnets.0.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.2.resnets.0.time_emb_proj.weight_scale", "zero_point": "up_blocks.2.resnets.0.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.0.time_emb_proj.input_scale", "zero_point": "up_blocks.2.resnets.0.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.0.time_emb_proj.output_scale", "zero_point": "up_blocks.2.resnets.0.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.2.resnets.0.norm2.weight", "bias": "up_blocks.2.resnets.0.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.2.resnets.0.conv2.weight", "bias": "up_blocks.2.resnets.0.conv2.bias", "weight_quant": { "scale": "up_blocks.2.resnets.0.conv2.weight_scale", "zero_point": "up_blocks.2.resnets.0.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.0.conv2.input_scale", "zero_point": "up_blocks.2.resnets.0.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.0.conv2.output_scale", "zero_point": "up_blocks.2.resnets.0.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.2.resnets.0.conv_shortcut.weight", "bias": "up_blocks.2.resnets.0.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.2.resnets.0.conv_shortcut.weight_scale", "zero_point": "up_blocks.2.resnets.0.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.0.conv_shortcut.input_scale", "zero_point": "up_blocks.2.resnets.0.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.0.conv_shortcut.output_scale", "zero_point": "up_blocks.2.resnets.0.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "1": { "norm1": { "weight": "up_blocks.2.resnets.1.norm1.weight", "bias": "up_blocks.2.resnets.1.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.2.resnets.1.conv1.weight", "bias": "up_blocks.2.resnets.1.conv1.bias", "weight_quant": { "scale": "up_blocks.2.resnets.1.conv1.weight_scale", "zero_point": "up_blocks.2.resnets.1.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.1.conv1.input_scale", "zero_point": "up_blocks.2.resnets.1.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.1.conv1.output_scale", "zero_point": "up_blocks.2.resnets.1.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.2.resnets.1.time_emb_proj.weight", "bias": "up_blocks.2.resnets.1.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.2.resnets.1.time_emb_proj.weight_scale", "zero_point": "up_blocks.2.resnets.1.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.1.time_emb_proj.input_scale", "zero_point": "up_blocks.2.resnets.1.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.1.time_emb_proj.output_scale", "zero_point": "up_blocks.2.resnets.1.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.2.resnets.1.norm2.weight", "bias": "up_blocks.2.resnets.1.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.2.resnets.1.conv2.weight", "bias": "up_blocks.2.resnets.1.conv2.bias", "weight_quant": { "scale": "up_blocks.2.resnets.1.conv2.weight_scale", "zero_point": "up_blocks.2.resnets.1.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.1.conv2.input_scale", "zero_point": "up_blocks.2.resnets.1.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.1.conv2.output_scale", "zero_point": "up_blocks.2.resnets.1.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.2.resnets.1.conv_shortcut.weight", "bias": "up_blocks.2.resnets.1.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.2.resnets.1.conv_shortcut.weight_scale", "zero_point": "up_blocks.2.resnets.1.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.1.conv_shortcut.input_scale", "zero_point": "up_blocks.2.resnets.1.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.1.conv_shortcut.output_scale", "zero_point": "up_blocks.2.resnets.1.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } }, "2": { "norm1": { "weight": "up_blocks.2.resnets.2.norm1.weight", "bias": "up_blocks.2.resnets.2.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "up_blocks.2.resnets.2.conv1.weight", "bias": "up_blocks.2.resnets.2.conv1.bias", "weight_quant": { "scale": "up_blocks.2.resnets.2.conv1.weight_scale", "zero_point": "up_blocks.2.resnets.2.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.2.conv1.input_scale", "zero_point": "up_blocks.2.resnets.2.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.2.conv1.output_scale", "zero_point": "up_blocks.2.resnets.2.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "up_blocks.2.resnets.2.time_emb_proj.weight", "bias": "up_blocks.2.resnets.2.time_emb_proj.bias", "weight_quant": { "scale": "up_blocks.2.resnets.2.time_emb_proj.weight_scale", "zero_point": "up_blocks.2.resnets.2.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.2.time_emb_proj.input_scale", "zero_point": "up_blocks.2.resnets.2.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.2.time_emb_proj.output_scale", "zero_point": "up_blocks.2.resnets.2.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "up_blocks.2.resnets.2.norm2.weight", "bias": "up_blocks.2.resnets.2.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "up_blocks.2.resnets.2.conv2.weight", "bias": "up_blocks.2.resnets.2.conv2.bias", "weight_quant": { "scale": "up_blocks.2.resnets.2.conv2.weight_scale", "zero_point": "up_blocks.2.resnets.2.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.2.conv2.input_scale", "zero_point": "up_blocks.2.resnets.2.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.2.conv2.output_scale", "zero_point": "up_blocks.2.resnets.2.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" }, "conv_shortcut": { "weight": "up_blocks.2.resnets.2.conv_shortcut.weight", "bias": "up_blocks.2.resnets.2.conv_shortcut.bias", "weight_quant": { "scale": "up_blocks.2.resnets.2.conv_shortcut.weight_scale", "zero_point": "up_blocks.2.resnets.2.conv_shortcut.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "up_blocks.2.resnets.2.conv_shortcut.input_scale", "zero_point": "up_blocks.2.resnets.2.conv_shortcut.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "up_blocks.2.resnets.2.conv_shortcut.output_scale", "zero_point": "up_blocks.2.resnets.2.conv_shortcut.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } } } }, "mid_block": { "attentions": { "0": { "norm": { "weight": "mid_block.attentions.0.norm.weight", "bias": "mid_block.attentions.0.norm.bias", "type": "GroupNorm" }, "proj_in": { "weight": "mid_block.attentions.0.proj_in.weight", "bias": "mid_block.attentions.0.proj_in.bias", "weight_quant": { "scale": "mid_block.attentions.0.proj_in.weight_scale", "zero_point": "mid_block.attentions.0.proj_in.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.proj_in.input_scale", "zero_point": "mid_block.attentions.0.proj_in.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.proj_in.output_scale", "zero_point": "mid_block.attentions.0.proj_in.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "transformer_blocks": { "0": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.0.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.0.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.0.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.0.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.0.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.0.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.0.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "1": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.1.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.1.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.1.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.1.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.1.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.1.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.1.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "2": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.2.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.2.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.2.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.2.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.2.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.2.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.2.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "3": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.3.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.3.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.3.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.3.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.3.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.3.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.3.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "4": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.4.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.4.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.4.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.4.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.4.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.4.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.4.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "5": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.5.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.5.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.5.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.5.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.5.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.5.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.5.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "6": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.6.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.6.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.6.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.6.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.6.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.6.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.6.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "7": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.7.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.7.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.7.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.7.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.7.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.7.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.7.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "8": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.8.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.8.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.8.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.8.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.8.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.8.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.8.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } }, "9": { "norm1": { "weight": "mid_block.attentions.0.transformer_blocks.9.norm1.weight", "bias": "mid_block.attentions.0.transformer_blocks.9.norm1.bias", "type": "LayerNorm" }, "attn1": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn1.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn1.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn1.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm2": { "weight": "mid_block.attentions.0.transformer_blocks.9.norm2.weight", "bias": "mid_block.attentions.0.transformer_blocks.9.norm2.bias", "type": "LayerNorm" }, "attn2": { "to_q": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn2.to_q.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_q.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_q.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_q.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_q.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_q.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_q.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_k": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn2.to_k.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_k.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_k.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_k.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_k.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_k.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_k.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_v": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn2.to_v.weight", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_v.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_v.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_v.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_v.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_v.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_v.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "to_out": { "0": { "weight": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.weight", "bias": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "1": { "type": "Dropout" } } }, "norm3": { "weight": "mid_block.attentions.0.transformer_blocks.9.norm3.weight", "bias": "mid_block.attentions.0.transformer_blocks.9.norm3.bias", "type": "LayerNorm" }, "ff": { "net": { "0": { "proj": { "weight": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.weight", "bias": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } }, "1": { "type": "Dropout" }, "2": { "weight": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.weight", "bias": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.bias", "weight_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.weight_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.input_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.output_scale", "zero_point": "mid_block.attentions.0.transformer_blocks.9.ff.net.2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } } } }, "proj_out": { "weight": "mid_block.attentions.0.proj_out.weight", "bias": "mid_block.attentions.0.proj_out.bias", "weight_quant": { "scale": "mid_block.attentions.0.proj_out.weight_scale", "zero_point": "mid_block.attentions.0.proj_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.attentions.0.proj_out.input_scale", "zero_point": "mid_block.attentions.0.proj_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.attentions.0.proj_out.output_scale", "zero_point": "mid_block.attentions.0.proj_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" } } }, "resnets": { "0": { "norm1": { "weight": "mid_block.resnets.0.norm1.weight", "bias": "mid_block.resnets.0.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "mid_block.resnets.0.conv1.weight", "bias": "mid_block.resnets.0.conv1.bias", "weight_quant": { "scale": "mid_block.resnets.0.conv1.weight_scale", "zero_point": "mid_block.resnets.0.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.resnets.0.conv1.input_scale", "zero_point": "mid_block.resnets.0.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.resnets.0.conv1.output_scale", "zero_point": "mid_block.resnets.0.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "mid_block.resnets.0.time_emb_proj.weight", "bias": "mid_block.resnets.0.time_emb_proj.bias", "weight_quant": { "scale": "mid_block.resnets.0.time_emb_proj.weight_scale", "zero_point": "mid_block.resnets.0.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.resnets.0.time_emb_proj.input_scale", "zero_point": "mid_block.resnets.0.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.resnets.0.time_emb_proj.output_scale", "zero_point": "mid_block.resnets.0.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "mid_block.resnets.0.norm2.weight", "bias": "mid_block.resnets.0.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "mid_block.resnets.0.conv2.weight", "bias": "mid_block.resnets.0.conv2.bias", "weight_quant": { "scale": "mid_block.resnets.0.conv2.weight_scale", "zero_point": "mid_block.resnets.0.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.resnets.0.conv2.input_scale", "zero_point": "mid_block.resnets.0.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.resnets.0.conv2.output_scale", "zero_point": "mid_block.resnets.0.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" } }, "1": { "norm1": { "weight": "mid_block.resnets.1.norm1.weight", "bias": "mid_block.resnets.1.norm1.bias", "type": "GroupNorm" }, "conv1": { "weight": "mid_block.resnets.1.conv1.weight", "bias": "mid_block.resnets.1.conv1.bias", "weight_quant": { "scale": "mid_block.resnets.1.conv1.weight_scale", "zero_point": "mid_block.resnets.1.conv1.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.resnets.1.conv1.input_scale", "zero_point": "mid_block.resnets.1.conv1.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.resnets.1.conv1.output_scale", "zero_point": "mid_block.resnets.1.conv1.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "time_emb_proj": { "weight": "mid_block.resnets.1.time_emb_proj.weight", "bias": "mid_block.resnets.1.time_emb_proj.bias", "weight_quant": { "scale": "mid_block.resnets.1.time_emb_proj.weight_scale", "zero_point": "mid_block.resnets.1.time_emb_proj.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.resnets.1.time_emb_proj.input_scale", "zero_point": "mid_block.resnets.1.time_emb_proj.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.resnets.1.time_emb_proj.output_scale", "zero_point": "mid_block.resnets.1.time_emb_proj.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantLinear" }, "norm2": { "weight": "mid_block.resnets.1.norm2.weight", "bias": "mid_block.resnets.1.norm2.bias", "type": "GroupNorm" }, "dropout": { "type": "Dropout" }, "conv2": { "weight": "mid_block.resnets.1.conv2.weight", "bias": "mid_block.resnets.1.conv2.bias", "weight_quant": { "scale": "mid_block.resnets.1.conv2.weight_scale", "zero_point": "mid_block.resnets.1.conv2.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "mid_block.resnets.1.conv2.input_scale", "zero_point": "mid_block.resnets.1.conv2.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "mid_block.resnets.1.conv2.output_scale", "zero_point": "mid_block.resnets.1.conv2.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" }, "nonlinearity": { "type": "SiLU" } } } }, "conv_norm_out": { "weight": "conv_norm_out.weight", "bias": "conv_norm_out.bias", "type": "GroupNorm" }, "conv_act": { "type": "SiLU" }, "conv_out": { "weight": "conv_out.weight", "bias": "conv_out.bias", "weight_quant": { "scale": "conv_out.weight_scale", "zero_point": "conv_out.weight_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "input_quant": { "scale": "conv_out.input_scale", "zero_point": "conv_out.input_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "output_quant": { "scale": "conv_out.output_scale", "zero_point": "conv_out.output_zero_point", "dtype": "fp8_e4m3", "qscheme": "per_tensor", "ch_axis": null, "group_size": null, "round_method": null, "scale_type": null }, "type": "QuantizedConv2d" } } }