arxyzan's picture
Hezar: Upload model_config.yaml
b9289bf
name: vit_roberta_image2text
config_type: model
encoder:
name: null
config_type: model
hidden_size: 768
num_hidden_layers: 12
num_attention_heads: 12
intermediate_size: 3072
hidden_act: gelu
hidden_dropout_prob: 0.0
attention_probs_dropout_prob: 0.0
initializer_range: 0.02
layer_norm_eps: 1.0e-12
image_size: 224
patch_size: 16
num_channels: 3
qkv_bias: true
encoder_stride: 16
decoder:
name: null
config_type: model
is_decoder: true
add_cross_attention: true
attention_probs_dropout_prob: 0.1
bos_token_id: 0
eos_token_id: 2
classifier_dropout: null
gradient_checkpointing: false
hidden_act: gelu
hidden_dropout_prob: 0.1
hidden_size: 768
initializer_range: 0.02
intermediate_size: 3072
layer_norm_eps: 1.0e-12
max_position_embeddings: 514
num_attention_heads: 12
num_hidden_layers: 12
pad_token_id: 1
position_embedding_type: absolute
type_vocab_size: 1
use_cache: true
vocab_size: 42000
generation:
bos_token_id: 0
decoder_start_token_id: 0
early_stopping: true
eos_token_id: 2
length_penalty: 2.0
max_length: 64
no_repeat_ngram_size: 3
num_beams: 4
pad_token_id: 2