{ "model": { "type": "image_transformer_v2", "input_channels": 3, "input_size": [256, 256], "patch_size": [2, 2], "depths": [2, 4, 4, 8], "widths": [128, 256, 512, 1024], "self_attns": [ {"type": "neighborhood", "d_head": 64, "kernel_size": 3}, {"type": "neighborhood", "d_head": 64, "kernel_size": 3}, {"type": "neighborhood", "d_head": 64, "kernel_size": 3}, {"type": "global", "d_head": 64} ], "loss_config": "karras", "loss_weighting": "soft-min-snr", "dropout_rate": [0.0, 0.0, 0.0, 0.0], "mapping_dropout_rate": 0.0, "augment_prob": 0.0, "sigma_data": 0.5, "sigma_min": 1e-2, "sigma_max": 160, "sigma_sample_density": { "type": "cosine-interpolated" } }, "dataset": { "type": "huggingface", "location": "woctordho/img-256-danbooru", "image_key": "image" }, "optimizer": { "type": "adam8bit", "lr": 5e-4, "betas": [0.9, 0.95], "eps": 1e-8, "weight_decay": 1e-3 }, "lr_sched": { "type": "constant", "warmup": 0.0 }, "ema_sched": { "type": "inverse", "power": 0.75, "max_value": 0.9999 } }