consistency-decoder / config.json
williamberman's picture
vae not pipeline
39f0fcf
{
"_class_name": "ConsistencyDecoderVae",
"_diffusers_version": "0.23.0.dev0",
"block_out_channels": [
128,
256,
512,
512
],
"decoder_args": {
"_use_default_values": [
"downsample_type",
"flip_sin_to_cos",
"mid_block_scale_factor",
"num_class_embeds",
"attention_head_dim",
"sample_size",
"act_fn",
"freq_shift",
"class_embed_type",
"attn_norm_num_groups",
"downsample_padding",
"upsample_type",
"dropout",
"center_input_sample"
],
"act_fn": "silu",
"add_attention": false,
"attention_head_dim": 8,
"attn_norm_num_groups": null,
"block_out_channels": [
320,
640,
1024,
1024
],
"center_input_sample": false,
"class_embed_type": null,
"down_block_types": [
"ResnetDownsampleBlock2D",
"ResnetDownsampleBlock2D",
"ResnetDownsampleBlock2D",
"ResnetDownsampleBlock2D"
],
"downsample_padding": 1,
"downsample_type": "conv",
"dropout": 0.0,
"flip_sin_to_cos": true,
"freq_shift": 0,
"in_channels": 7,
"layers_per_block": 3,
"mid_block_scale_factor": 1,
"norm_eps": 1e-05,
"norm_num_groups": 32,
"num_class_embeds": null,
"num_train_timesteps": 1024,
"out_channels": 6,
"resnet_time_scale_shift": "scale_shift",
"sample_size": null,
"time_embedding_type": "learned",
"up_block_types": [
"ResnetUpsampleBlock2D",
"ResnetUpsampleBlock2D",
"ResnetUpsampleBlock2D",
"ResnetUpsampleBlock2D"
],
"upsample_type": "conv"
},
"encoder_args": {
"act_fn": "silu",
"block_out_channels": [
128,
256,
512,
512
],
"double_z": true,
"down_block_types": [
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
"in_channels": 3,
"layers_per_block": 2,
"norm_num_groups": 32,
"out_channels": 4
},
"latent_channels": 4,
"scaling_factor": 0.18215
}