|
{ |
|
"model_type": "dinosaur", |
|
"initializer": { |
|
"name": "RandomInit", |
|
"n_slots": 7, |
|
"dim": 512 |
|
}, |
|
"encoder": { |
|
"backbone": { |
|
"name": "TimmExtractor", |
|
"model": "vit_large_patch14_reg4_dinov2.lvd142m", |
|
"features": "vit_block12", |
|
"frozen": true, |
|
"pretrained": true, |
|
"model_kwargs": { |
|
"dynamic_img_size": true |
|
} |
|
}, |
|
"output_transform": { |
|
"name": "networks.two_layer_mlp", |
|
"inp_dim": 1024, |
|
"outp_dim": 512, |
|
"hidden_dim": 2048, |
|
"layer_norm": true |
|
} |
|
}, |
|
"grouper": { |
|
"name": "SlotAttention", |
|
"inp_dim": 512, |
|
"slot_dim": 512, |
|
"n_iters": 3, |
|
"use_mlp": true |
|
}, |
|
"decoder": { |
|
"name": "MLPDecoder", |
|
"inp_dim": 512, |
|
"outp_dim": 1024, |
|
"hidden_dims": [2048, 2048, 2048], |
|
"n_patches": 676 |
|
}, |
|
"aux_outputs": false |
|
} |