dataset_kwargs: window_size: 512 window_stride: 384 debug: false inference_kwargs: im_size: 1024 window_size: 512 window_stride: 256 net_kwargs: backbone: dino_vits16 d_model: 384 decoder: drop_path_rate: 0.0 dropout: 0.1 n_cls: 31 n_layers: 1 name: mask_transformer distilled: false drop_path_rate: 0.1 dropout: 0.0 image_size: !!python/tuple - 256 - 256 n_cls: 31 n_heads: 3 n_layers: 12 normalization: deit patch_size: 16 val_dataset_kwargs: batch_size: 1 crop_size: 512 dataset: cityscapes image_size: 1024 normalization: deit num_workers: 10 split: val