{ "bottleneck": { "args": { "bottleneck_dim": 16, "norm": "none", "regularizer": { "args": { "codebook_loss_weight": 1.0, "codebook_size": 8192, "commitment_loss_weight": 0.25, "entropy_loss_temperature": 0.01, "entropy_loss_weight": 0.0, "l2_normalized": true, "stochastic": true, "stochastic_temperature": 0.03 }, "name": "vq" } }, "name": "bottleneck" }, "bottleneck_token_num": 1024, "decoder_depth": 12, "decoder_hidden_size": 768, "decoder_name": "none", "decoder_num_heads": 12, "decoder_patch_size": 8, "decoder_temporal_patch_size": 4, "encoder_depth": 12, "encoder_hidden_size": 768, "encoder_name": "none", "encoder_num_heads": 12, "encoder_query_gaussian_init": true, "frame_num": 16, "in_channels": 3, "input_size": 128, "latent_pe_scale_factor": 10000, "learned_decoder_latent_pe": false, "learned_decoder_patch_query_embed": false, "learned_encoder_latent_query_embed": true, "learned_encoder_patch_pe": false, "patch_size": 8, "prior_model": { "args": { "l2_normalized": true }, "avg_loss_over_rounds": true, "latent_ce_temperature": 1.0, "mix_ss_max_ratio": 0.5, "mix_ss_peak_steps_ratio": 0.3, "n_rounds": 2, "name": "gptc-S", "no_dropout": false, "no_grad_before_last_round": false, "use_mix_ss": true }, "query_init_std": 0.02, "temporal_patch_size": 4, "transformer_name": "transformer_encoder_parallel", "use_decoder_latent_token_type_embed": false, "use_decoder_patch_query_token_type_embed": true, "use_encoder_latent_query_token_type_embed": false, "use_encoder_patch_token_type_embed": false }