arjunanand13 commited on
Commit
42c212c
1 Parent(s): 316aed3

Create config.json

Browse files
Files changed (1) hide show
  1. config.json +68 -0
config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/Florence-2-base-ft",
3
+ "architectures": ["Florence2ForConditionalGeneration"],
4
+ "auto_map": {
5
+ "AutoConfig": "configuration_florence2.Florence2Config",
6
+ "AutoModelForCausalLM": "modeling_florence2.Florence2ForConditionalGeneration"
7
+ },
8
+ "bos_token_id": 2,
9
+ "eos_token_id": 1,
10
+ "ignore_index": -100,
11
+ "is_encoder_decoder": true,
12
+ "model_type": "florence2",
13
+ "pad_token_id": 0,
14
+ "projection_dim": 768,
15
+ "vocab_size": 51289,
16
+ "torch_dtype": "float32", // Keep fine-tuned dtype if trained in float32
17
+ "text_config": {
18
+ "activation_dropout": 0.1,
19
+ "activation_function": "gelu",
20
+ "attention_dropout": 0.1,
21
+ "bos_token_id": 2,
22
+ "decoder_attention_heads": 12,
23
+ "decoder_ffn_dim": 3072,
24
+ "decoder_layers": 6,
25
+ "d_model": 768,
26
+ "encoder_attention_heads": 12,
27
+ "encoder_ffn_dim": 3072,
28
+ "encoder_layers": 6,
29
+ "eos_token_id": 1,
30
+ "forced_bos_token_id": 2,
31
+ "forced_eos_token_id": 1,
32
+ "num_beams": 3,
33
+ "no_repeat_ngram_size": 3,
34
+ "dropout": 0.1,
35
+ "label2id": {
36
+ "LABEL_0": 0,
37
+ "LABEL_1": 1,
38
+ "LABEL_2": 2
39
+ },
40
+ "max_position_embeddings": 1024,
41
+ "is_encoder_decoder": true,
42
+ "pad_token_id": 0
43
+ },
44
+ "vision_config": {
45
+ "model_type": "davit",
46
+ "drop_path_rate": 0.1,
47
+ "patch_size": [7, 3, 3, 3],
48
+ "patch_stride": [4, 2, 2, 2],
49
+ "patch_padding": [3, 1, 1, 1],
50
+ "patch_prenorm": [false, true, true, true],
51
+ "dim_embed": [128, 256, 512, 1024],
52
+ "num_heads": [4, 8, 16, 32],
53
+ "num_groups": [4, 8, 16, 32],
54
+ "depths": [1, 1, 9, 1],
55
+ "window_size": 12,
56
+ "projection_dim": 768,
57
+ "visual_temporal_embedding": {
58
+ "type": "COSINE",
59
+ "max_temporal_embeddings": 100
60
+ },
61
+ "image_pos_embed": {
62
+ "type": "learned_abs_2d",
63
+ "max_pos_embeddings": 50
64
+ },
65
+ "image_feature_source": ["spatial_avg_pool", "temporal_avg_pool"]
66
+ },
67
+ "transformers_version": "4.41.0.dev0"
68
+ }