|
{ |
|
"architectures": [ |
|
"STDiT3" |
|
], |
|
"caption_channels": 4096, |
|
"class_dropout_prob": 0.1, |
|
"depth": 28, |
|
"drop_path": 0.0, |
|
"enable_flash_attn": true, |
|
"enable_layernorm_kernel": true, |
|
"enable_sequence_parallelism": false, |
|
"freeze_y_embedder": true, |
|
"hidden_size": 1152, |
|
"in_channels": 4, |
|
"input_size": [ |
|
null, |
|
null, |
|
null |
|
], |
|
"input_sq_size": 512, |
|
"mlp_ratio": 4.0, |
|
"model_max_length": 300, |
|
"model_type": "STDiT3", |
|
"num_heads": 16, |
|
"only_train_temporal": false, |
|
"patch_size": [ |
|
1, |
|
2, |
|
2 |
|
], |
|
"pred_sigma": true, |
|
"qk_norm": true, |
|
"skip_y_embedder": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.36.2" |
|
} |
|
|