|
unet_additional_kwargs: |
|
unet_use_cross_frame_attention: false |
|
unet_use_temporal_attention: false |
|
use_motion_module: true |
|
motion_module_resolutions: [1,2,4,8] |
|
motion_module_mid_block: false |
|
motion_module_decoder_only: false |
|
motion_module_type: "Vanilla" |
|
|
|
motion_module_kwargs: |
|
num_attention_heads: 8 |
|
num_transformer_block: 1 |
|
attention_block_types: [ "Temporal_Self", "Temporal_Self" ] |
|
temporal_position_encoding: true |
|
temporal_position_encoding_max_len: 24 |
|
temporal_attention_dim_div: 1 |
|
|
|
noise_scheduler_kwargs: |
|
beta_start: 0.00085 |
|
beta_end: 0.012 |
|
beta_schedule: "linear" |
|
steps_offset: 1 |
|
clip_sample: False |