|
_target_: ignore_this_field |
|
depth: 48 |
|
patch_size: 2 |
|
num_heads: 24 |
|
hidden_size_x: 3072 |
|
hidden_size_y: 1536 |
|
mlp_ratio_x: 4.0 |
|
mlp_ratio_y: 4.0 |
|
learn_sigma: false |
|
in_channels: 12 |
|
clip_feat_dim: 2048 |
|
qk_norm: true |
|
qkv_bias: false |
|
out_bias: true |
|
attn_drop: 0.0 |
|
patch_embed_bias: true |
|
posenc_preserve_area: true |
|
timestep_mlp_bias: true |
|
pooled_caption_mlp_bias: true |
|
attend_to_padding: false |
|
timestep_scale: 1000.0 |
|
use_t5: true |
|
t5_feat_dim: 4096 |
|
t5_token_length: 256 |
|
rope_theta: 10000.0 |
|
use_transformer_engine: true |
|
|