transformer_additional_kwargs: transformer_type: "Transformer3DModel" patch_3d: false fake_3d: false basic_block_type: "global_motionmodule" time_position_encoding_before_transformer: false motion_module_type: "Vanilla" enable_uvit: true motion_module_kwargs_even: num_attention_heads: 16 num_transformer_block: 1 attention_block_types: [ "Temporal_Self", "Temporal_Self" ] temporal_position_encoding: true temporal_position_encoding_max_len: 4096 temporal_attention_dim_div: 1 block_size: 1 remove_time_embedding_in_photo: false motion_module_kwargs_odd: num_attention_heads: 16 num_transformer_block: 1 attention_block_types: [ "Temporal_Self", "Global_Self" ] temporal_position_encoding: true temporal_position_encoding_max_len: 4096 temporal_attention_dim_div: 1 block_size: 1 remove_time_embedding_in_photo: false vae_kwargs: vae_type: "AutoencoderKLMagvit" mini_batch_encoder: 8 mini_batch_decoder: 2 slice_mag_vae: false slice_compression_vae: true cache_compression_vae: false cache_mag_vae: false text_encoder_kwargs: enable_multi_text_encoder: false