{ "d_model": 2048, "decoder_attention_heads": 32, "decoder_ffn_dim": 4096, "decoder_layers": 24, "decoder_start_token_id": 16384, "encoder_attention_heads": 32, "encoder_ffn_dim": 4096, "encoder_layers": 24, "encoder_vocab_size": 50272, "image_length": 256, "image_vocab_size": 16415, "max_text_length": 64 }