farewellthree commited on
Commit
d5218f5
1 Parent(s): 67efb10

qwen_weight

Browse files
ppllava_qwen7b_llavahound300k/config.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/raid/raushan/si-7b",
3
+ "architectures": [
4
+ "LlavaInterleaveForConditionalGeneration"
5
+ ],
6
+ "btadapter": false,
7
+ "btadapter_depth": 4,
8
+ "clip_post_pretrain": null,
9
+ "clip_weight": "google/siglip-so400m-patch14-384",
10
+ "frame_shape": [
11
+ 27,
12
+ 27
13
+ ],
14
+ "hidden_size": 3584,
15
+ "ignore_index": -100,
16
+ "image_grid_pinpoints": [
17
+ [
18
+ 384,
19
+ 384
20
+ ],
21
+ [
22
+ 384,
23
+ 768
24
+ ],
25
+ [
26
+ 384,
27
+ 1152
28
+ ],
29
+ [
30
+ 384,
31
+ 1536
32
+ ],
33
+ [
34
+ 384,
35
+ 1920
36
+ ],
37
+ [
38
+ 384,
39
+ 2304
40
+ ],
41
+ [
42
+ 768,
43
+ 384
44
+ ],
45
+ [
46
+ 768,
47
+ 768
48
+ ],
49
+ [
50
+ 768,
51
+ 1152
52
+ ],
53
+ [
54
+ 768,
55
+ 1536
56
+ ],
57
+ [
58
+ 768,
59
+ 1920
60
+ ],
61
+ [
62
+ 768,
63
+ 2304
64
+ ],
65
+ [
66
+ 1152,
67
+ 384
68
+ ],
69
+ [
70
+ 1152,
71
+ 768
72
+ ],
73
+ [
74
+ 1152,
75
+ 1152
76
+ ],
77
+ [
78
+ 1152,
79
+ 1536
80
+ ],
81
+ [
82
+ 1152,
83
+ 1920
84
+ ],
85
+ [
86
+ 1152,
87
+ 2304
88
+ ],
89
+ [
90
+ 1536,
91
+ 384
92
+ ],
93
+ [
94
+ 1536,
95
+ 768
96
+ ],
97
+ [
98
+ 1536,
99
+ 1152
100
+ ],
101
+ [
102
+ 1536,
103
+ 1536
104
+ ],
105
+ [
106
+ 1536,
107
+ 1920
108
+ ],
109
+ [
110
+ 1536,
111
+ 2304
112
+ ],
113
+ [
114
+ 1920,
115
+ 384
116
+ ],
117
+ [
118
+ 1920,
119
+ 768
120
+ ],
121
+ [
122
+ 1920,
123
+ 1152
124
+ ],
125
+ [
126
+ 1920,
127
+ 1536
128
+ ],
129
+ [
130
+ 1920,
131
+ 1920
132
+ ],
133
+ [
134
+ 1920,
135
+ 2304
136
+ ],
137
+ [
138
+ 2304,
139
+ 384
140
+ ],
141
+ [
142
+ 2304,
143
+ 768
144
+ ],
145
+ [
146
+ 2304,
147
+ 1152
148
+ ],
149
+ [
150
+ 2304,
151
+ 1536
152
+ ],
153
+ [
154
+ 2304,
155
+ 1920
156
+ ],
157
+ [
158
+ 2304,
159
+ 2304
160
+ ]
161
+ ],
162
+ "image_pooling_kernel": [
163
+ 1,
164
+ 3,
165
+ 3
166
+ ],
167
+ "image_pooling_stride": [
168
+ 1,
169
+ 3,
170
+ 3
171
+ ],
172
+ "image_token_index": 151646,
173
+ "long_clip": true,
174
+ "max_T": 64,
175
+ "model_type": "llava_onevision",
176
+ "pad_token_id": 151643,
177
+ "pooling": "clipST_3d",
178
+ "pooling_kernel": [
179
+ 2,
180
+ 3,
181
+ 3
182
+ ],
183
+ "pooling_stride": [
184
+ 2,
185
+ 3,
186
+ 3
187
+ ],
188
+ "pooling_temp": 0.01,
189
+ "projector_hidden_act": "gelu",
190
+ "qwen": true,
191
+ "text_config": {
192
+ "_name_or_path": "Qwen/Qwen2-7B-Instruct",
193
+ "architectures": [
194
+ "Qwen2ForCausalLM"
195
+ ],
196
+ "bos_token_id": 151643,
197
+ "eos_token_id": 151645,
198
+ "hidden_size": 3584,
199
+ "intermediate_size": 18944,
200
+ "model_type": "qwen2",
201
+ "num_attention_heads": 28,
202
+ "num_hidden_layers": 28,
203
+ "num_key_value_heads": 4,
204
+ "rope_theta": 1000000.0,
205
+ "torch_dtype": "bfloat16",
206
+ "vocab_size": 152128
207
+ },
208
+ "tie_word_embeddings": false,
209
+ "torch_dtype": "bfloat16",
210
+ "transformers_version": "4.45.2",
211
+ "use_image_newline_parameter": true,
212
+ "video_token_index": 151647,
213
+ "vision_aspect_ratio": "anyres_max_9",
214
+ "vision_config": {
215
+ "hidden_size": 1152,
216
+ "image_size": 384,
217
+ "intermediate_size": 4304,
218
+ "model_type": "siglip_vision_model",
219
+ "num_attention_heads": 16,
220
+ "num_hidden_layers": 26,
221
+ "patch_size": 14,
222
+ "vision_use_head": false
223
+ },
224
+ "vision_feature_layer": -1,
225
+ "vision_feature_select_strategy": "full"
226
+ }
ppllava_qwen7b_llavahound300k/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "pad_token_id": 151643,
6
+ "transformers_version": "4.45.2"
7
+ }
ppllava_qwen7b_llavahound300k/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da333b38bc421c35f4ce1b93a2327a4ab5ae25c7058afb04cf2d8bfc952c28e5
3
+ size 4909741188
ppllava_qwen7b_llavahound300k/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d78689f8c0c153346fc4f0ac48e1b529672974069da747abf796570c8c35941
3
+ size 4991497768
ppllava_qwen7b_llavahound300k/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe2b68e0bb66798772203dd2350b6d6a9940b91649a1f5d507f19150e417ff69
3
+ size 4932752872
ppllava_qwen7b_llavahound300k/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf32964185a428b622027f189af55280767ec4d30281df2efbb3894d5c18ba40
3
+ size 2158500536
ppllava_qwen7b_llavahound300k/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
ppllava_qwen7b_llavahound300k/runs/Oct23_12-32-41_bore-prod-ruyangliu-20241022110203-worker-0/events.out.tfevents.1729657992.bore-prod-ruyangliu-20241022110203-worker-0.771139.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7828017dc98626eb87ab305ef43f0d6525d106dd64ece11f13fdcf5b33fae57f
3
+ size 12109
ppllava_qwen7b_llavahound300k/trainer_state.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9997490589711417,
5
+ "eval_steps": 500,
6
+ "global_step": 996,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.050188205771643665,
13
+ "grad_norm": 2.3202156020973708,
14
+ "learning_rate": 9.989427142584392e-06,
15
+ "loss": 1.5003,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.10037641154328733,
20
+ "grad_norm": 2.314291595101664,
21
+ "learning_rate": 9.870995413367397e-06,
22
+ "loss": 1.3883,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.15056461731493098,
27
+ "grad_norm": 2.314931158603531,
28
+ "learning_rate": 9.624050979896533e-06,
29
+ "loss": 1.3754,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.20075282308657466,
34
+ "grad_norm": 2.152003211136021,
35
+ "learning_rate": 9.255109039631998e-06,
36
+ "loss": 1.3628,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.25094102885821834,
41
+ "grad_norm": 2.1047973694759157,
42
+ "learning_rate": 8.773903481118611e-06,
43
+ "loss": 1.3543,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.30112923462986196,
48
+ "grad_norm": 2.1025984809300318,
49
+ "learning_rate": 8.193130072341872e-06,
50
+ "loss": 1.3515,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.35131744040150564,
55
+ "grad_norm": 2.2036136599303076,
56
+ "learning_rate": 7.528111505069428e-06,
57
+ "loss": 1.3419,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 0.4015056461731493,
62
+ "grad_norm": 2.1361038466937563,
63
+ "learning_rate": 6.796393132397829e-06,
64
+ "loss": 1.3361,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.451693851944793,
69
+ "grad_norm": 2.1253080705715175,
70
+ "learning_rate": 6.0172800652631706e-06,
71
+ "loss": 1.3336,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 0.5018820577164367,
76
+ "grad_norm": 2.094902290851612,
77
+ "learning_rate": 5.211327840815459e-06,
78
+ "loss": 1.321,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 0.5520702634880803,
83
+ "grad_norm": 2.086008706466924,
84
+ "learning_rate": 4.399800100481858e-06,
85
+ "loss": 1.3173,
86
+ "step": 550
87
+ },
88
+ {
89
+ "epoch": 0.6022584692597239,
90
+ "grad_norm": 2.084376290248031,
91
+ "learning_rate": 3.6041075859356383e-06,
92
+ "loss": 1.3044,
93
+ "step": 600
94
+ },
95
+ {
96
+ "epoch": 0.6524466750313677,
97
+ "grad_norm": 2.1873617946017525,
98
+ "learning_rate": 2.845243254082134e-06,
99
+ "loss": 1.3029,
100
+ "step": 650
101
+ },
102
+ {
103
+ "epoch": 0.7026348808030113,
104
+ "grad_norm": 2.1539300794808733,
105
+ "learning_rate": 2.1432284145659104e-06,
106
+ "loss": 1.2977,
107
+ "step": 700
108
+ },
109
+ {
110
+ "epoch": 0.7528230865746549,
111
+ "grad_norm": 2.100942527337154,
112
+ "learning_rate": 1.5165845024934366e-06,
113
+ "loss": 1.3072,
114
+ "step": 750
115
+ },
116
+ {
117
+ "epoch": 0.8030112923462986,
118
+ "grad_norm": 2.2004509246977477,
119
+ "learning_rate": 9.81844422725109e-07,
120
+ "loss": 1.3014,
121
+ "step": 800
122
+ },
123
+ {
124
+ "epoch": 0.8531994981179423,
125
+ "grad_norm": 2.148051612184209,
126
+ "learning_rate": 5.531163580638483e-07,
127
+ "loss": 1.2948,
128
+ "step": 850
129
+ },
130
+ {
131
+ "epoch": 0.903387703889586,
132
+ "grad_norm": 2.0857628452747248,
133
+ "learning_rate": 2.417115494991107e-07,
134
+ "loss": 1.2925,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 0.9535759096612296,
139
+ "grad_norm": 2.1477939821041936,
140
+ "learning_rate": 5.584586887435739e-08,
141
+ "loss": 1.2951,
142
+ "step": 950
143
+ },
144
+ {
145
+ "epoch": 0.9997490589711417,
146
+ "step": 996,
147
+ "total_flos": 3245135674474496.0,
148
+ "train_loss": 1.3331998112690018,
149
+ "train_runtime": 34717.8895,
150
+ "train_samples_per_second": 7.345,
151
+ "train_steps_per_second": 0.029
152
+ }
153
+ ],
154
+ "logging_steps": 50,
155
+ "max_steps": 996,
156
+ "num_input_tokens_seen": 0,
157
+ "num_train_epochs": 1,
158
+ "save_steps": 420,
159
+ "stateful_callbacks": {
160
+ "TrainerControl": {
161
+ "args": {
162
+ "should_epoch_stop": false,
163
+ "should_evaluate": false,
164
+ "should_log": false,
165
+ "should_save": true,
166
+ "should_training_stop": true
167
+ },
168
+ "attributes": {}
169
+ }
170
+ },
171
+ "total_flos": 3245135674474496.0,
172
+ "train_batch_size": 4,
173
+ "trial_name": null,
174
+ "trial_params": null
175
+ }
ppllava_qwen7b_llavahound300k/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f7cc6ad8ff8f49f3b95141d4510548fa0bfdec4021e61ae08fc3cf7bfb8ae0c
3
+ size 7352