File size: 680 Bytes
04e9878
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
dataset:
  tokenizer_type: CharBPE
  context_length: 64
  image_resolution: 256

stage1:
  type: vqgan
  embed_dim: 256
  n_embed: 16384
  hparams:
    double_z: False
    z_channels: 256
    resolution: 256
    in_channels: 3
    out_ch: 3
    ch: 128
    ch_mult: [1, 1, 2, 2, 4]
    num_res_blocks: 2
    attn_resolutions: [16]
    pdrop: 0.0

stage2:
  type: transformer1d
  vocab_size_txt: 16384
  vocab_size_img: 16384
  hparams:
    embed_dim: 1536
    n_layers: 42
    n_heads: 24
    n_dense_layers: 42
    ctx_len_img: 256
    ctx_len_txt: 64
    embd_pdrop: 0.0
    resid_pdrop: 0.0
    attn_pdrop: 0.0
    mlp_bias: True
    attn_bias: True
    gelu_use_approx: False