Update config.json
Browse files- config.json +6 -6
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "./checkpoints/
|
3 |
"architectures": [
|
4 |
"LlavaLlamaModel"
|
5 |
],
|
@@ -11,7 +11,7 @@
|
|
11 |
"image_aspect_ratio": "resize",
|
12 |
"interpolate_mode": "linear",
|
13 |
"llm_cfg": {
|
14 |
-
"_name_or_path": "./checkpoints/
|
15 |
"add_cross_attention": false,
|
16 |
"architectures": [
|
17 |
"LlamaForCausalLM"
|
@@ -96,7 +96,7 @@
|
|
96 |
},
|
97 |
"mm_hidden_size": 1152,
|
98 |
"mm_projector_cfg": {
|
99 |
-
"_name_or_path": "./checkpoints/
|
100 |
"add_cross_attention": false,
|
101 |
"architectures": [
|
102 |
"MultimodalProjector"
|
@@ -170,7 +170,7 @@
|
|
170 |
"model_type": "llava_llama",
|
171 |
"num_video_frames": 8,
|
172 |
"region_extractor_cfg": {
|
173 |
-
"_name_or_path": "./checkpoints/
|
174 |
"add_cross_attention": false,
|
175 |
"architectures": [
|
176 |
"RegionExtractor"
|
@@ -235,7 +235,7 @@
|
|
235 |
"typical_p": 1.0,
|
236 |
"use_bfloat16": false
|
237 |
},
|
238 |
-
"resume_path": "./checkpoints/
|
239 |
"s2": false,
|
240 |
"s2_max_split_size": 336,
|
241 |
"s2_scales": "336,672,1008",
|
@@ -246,7 +246,7 @@
|
|
246 |
"tune_vision_tower": true,
|
247 |
"vision_resolution": -1,
|
248 |
"vision_tower_cfg": {
|
249 |
-
"_name_or_path": "./checkpoints/
|
250 |
"add_cross_attention": false,
|
251 |
"architectures": [
|
252 |
"SiglipVisionModel"
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B",
|
3 |
"architectures": [
|
4 |
"LlavaLlamaModel"
|
5 |
],
|
|
|
11 |
"image_aspect_ratio": "resize",
|
12 |
"interpolate_mode": "linear",
|
13 |
"llm_cfg": {
|
14 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/llm",
|
15 |
"add_cross_attention": false,
|
16 |
"architectures": [
|
17 |
"LlamaForCausalLM"
|
|
|
96 |
},
|
97 |
"mm_hidden_size": 1152,
|
98 |
"mm_projector_cfg": {
|
99 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/mm_projector",
|
100 |
"add_cross_attention": false,
|
101 |
"architectures": [
|
102 |
"MultimodalProjector"
|
|
|
170 |
"model_type": "llava_llama",
|
171 |
"num_video_frames": 8,
|
172 |
"region_extractor_cfg": {
|
173 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/region_extractor",
|
174 |
"add_cross_attention": false,
|
175 |
"architectures": [
|
176 |
"RegionExtractor"
|
|
|
235 |
"typical_p": 1.0,
|
236 |
"use_bfloat16": false
|
237 |
},
|
238 |
+
"resume_path": "./checkpoints/SpatialRGPT-VILA1.5-8B",
|
239 |
"s2": false,
|
240 |
"s2_max_split_size": 336,
|
241 |
"s2_scales": "336,672,1008",
|
|
|
246 |
"tune_vision_tower": true,
|
247 |
"vision_resolution": -1,
|
248 |
"vision_tower_cfg": {
|
249 |
+
"_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/vision_tower",
|
250 |
"add_cross_attention": false,
|
251 |
"architectures": [
|
252 |
"SiglipVisionModel"
|