RamAnanth1 commited on
Commit
220bb8d
β€’
1 Parent(s): b6a5660

Create model_adapter_depth_config.yaml

Browse files
Files changed (1) hide show
  1. model_adapter_depth_config.yaml +89 -0
model_adapter_depth_config.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ target: lvdm.models.ddpm3d.T2VAdapterDepth
3
+ params:
4
+ linear_start: 0.00085
5
+ linear_end: 0.012
6
+ num_timesteps_cond: 1
7
+ log_every_t: 200
8
+ timesteps: 1000
9
+ first_stage_key: video
10
+ cond_stage_key: caption
11
+ image_size:
12
+ - 32
13
+ - 32
14
+ video_length: 16
15
+ channels: 4
16
+ cond_stage_trainable: false
17
+ conditioning_key: crossattn
18
+ scale_by_std: false
19
+ scale_factor: 0.18215
20
+
21
+ unet_config:
22
+ target: lvdm.models.modules.openaimodel3d.UNetModel
23
+ params:
24
+ image_size: 32
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ transformer_depth: 1
40
+ context_dim: 768
41
+ use_checkpoint: true
42
+ legacy: false
43
+ kernel_size_t: 1
44
+ padding_t: 0
45
+ temporal_length: 16
46
+ use_relative_position: true
47
+
48
+ first_stage_config:
49
+ target: lvdm.models.autoencoder.AutoencoderKL
50
+ params:
51
+ embed_dim: 4
52
+ monitor: val/rec_loss
53
+ ddconfig:
54
+ double_z: true
55
+ z_channels: 4
56
+ resolution: 256
57
+ in_channels: 3
58
+ out_ch: 3
59
+ ch: 128
60
+ ch_mult:
61
+ - 1
62
+ - 2
63
+ - 4
64
+ - 4
65
+ num_res_blocks: 2
66
+ attn_resolutions: []
67
+ dropout: 0.0
68
+ lossconfig:
69
+ target: torch.nn.Identity
70
+
71
+ cond_stage_config:
72
+ target: lvdm.models.modules.condition_modules.FrozenCLIPEmbedder
73
+
74
+ depth_stage_config:
75
+ target: extralibs.midas.api.MiDaSInference
76
+ params:
77
+ model_type: "dpt_hybrid"
78
+ model_path: models/adapter_t2v_depth/dpt_hybrid-midas.pt
79
+
80
+ adapter_config:
81
+ target: lvdm.models.modules.adapter.Adapter
82
+ cond_name: depth
83
+ params:
84
+ cin: 64
85
+ channels: [320, 640, 1280, 1280]
86
+ nums_rb: 2
87
+ ksize: 1
88
+ sk: True
89
+ use_conv: False