{ | |
"architectures": [ | |
"LlavaForConditionalGeneration" | |
], | |
"ignore_index": -100, | |
"image_token_index": 32000, | |
"model_type": "llava", | |
"pad_token_id": 32001, | |
"projector_hidden_act": "gelu", | |
"text_config": { | |
"_name_or_path": "/content/llama2", | |
"architectures": [ | |
"LlamaForCausalLM" | |
], | |
"max_position_embeddings": 4096, | |
"model_type": "llama", | |
"pad_token_id": 0, | |
"torch_dtype": "float16", | |
"vocab_size": 32064 | |
}, | |
"torch_dtype": "bfloat16", | |
"transformers_version": "4.41.2", | |
"vision_config": { | |
"architectures": [ | |
"DonutSwinModel" | |
], | |
"attention_probs_dropout_prob": 0.0, | |
"depths": [ | |
2, | |
2, | |
14, | |
2 | |
], | |
"drop_path_rate": 0.1, | |
"embed_dim": 128, | |
"hidden_act": "gelu", | |
"hidden_dropout_prob": 0.0, | |
"hidden_size": 1024, | |
"image_size": [ | |
512, | |
512 | |
], | |
"mlp_ratio": 4.0, | |
"model_type": "donut-swin", | |
"num_heads": [ | |
4, | |
8, | |
16, | |
32 | |
], | |
"num_layers": 4, | |
"patch_size": 4, | |
"path_norm": true, | |
"qkv_bias": true, | |
"torch_dtype": "float32", | |
"use_absolute_embeddings": false, | |
"window_size": 10 | |
}, | |
"vision_feature_layer": -1, | |
"vision_feature_select_strategy": "default" | |
} | |