_target_: ignore_this_field depth: 48 patch_size: 2 num_heads: 24 hidden_size_x: 3072 hidden_size_y: 1536 mlp_ratio_x: 4.0 mlp_ratio_y: 4.0 learn_sigma: false in_channels: 12 clip_feat_dim: 2048 qk_norm: true qkv_bias: false out_bias: true attn_drop: 0.0 patch_embed_bias: true posenc_preserve_area: true timestep_mlp_bias: true pooled_caption_mlp_bias: true attend_to_padding: false timestep_scale: 1000.0 use_t5: true t5_feat_dim: 4096 t5_token_length: 256 rope_theta: 10000.0 use_transformer_engine: true