zero123-live / taming-transformers /configs /imagenetdepth_vqgan.yaml
turn-the-cam-anonymous's picture
adding CLIP taming
1ed7deb
raw
history blame
950 Bytes
model:
base_learning_rate: 4.5e-6
target: taming.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 1024
image_key: depth
ddconfig:
double_z: False
z_channels: 256
resolution: 256
in_channels: 1
out_ch: 1
ch: 128
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16]
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: False
disc_in_channels: 1
disc_start: 50001
disc_weight: 0.75
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 3
num_workers: 8
train:
target: taming.data.imagenet.ImageNetTrainWithDepth
params:
size: 256
validation:
target: taming.data.imagenet.ImageNetValidationWithDepth
params:
size: 256