a8cheng commited on
Commit
64df790
1 Parent(s): 17b6860

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +6 -6
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-vila-v1.5-srgpt-sft-fixed",
3
  "architectures": [
4
  "LlavaLlamaModel"
5
  ],
@@ -11,7 +11,7 @@
11
  "image_aspect_ratio": "resize",
12
  "interpolate_mode": "linear",
13
  "llm_cfg": {
14
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-vila-v1.5-srgpt-sft-fixed/llm",
15
  "add_cross_attention": false,
16
  "architectures": [
17
  "LlamaForCausalLM"
@@ -96,7 +96,7 @@
96
  },
97
  "mm_hidden_size": 1152,
98
  "mm_projector_cfg": {
99
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-vila-v1.5-srgpt-sft-fixed/mm_projector",
100
  "add_cross_attention": false,
101
  "architectures": [
102
  "MultimodalProjector"
@@ -170,7 +170,7 @@
170
  "model_type": "llava_llama",
171
  "num_video_frames": 8,
172
  "region_extractor_cfg": {
173
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-vila-v1.5-srgpt-sft-fixed/region_extractor",
174
  "add_cross_attention": false,
175
  "architectures": [
176
  "RegionExtractor"
@@ -235,7 +235,7 @@
235
  "typical_p": 1.0,
236
  "use_bfloat16": false
237
  },
238
- "resume_path": "./checkpoints/vila-siglip-llama3-8b-vila-v1.5-srgpt-sft-fixed",
239
  "s2": false,
240
  "s2_max_split_size": 336,
241
  "s2_scales": "336,672,1008",
@@ -246,7 +246,7 @@
246
  "tune_vision_tower": true,
247
  "vision_resolution": -1,
248
  "vision_tower_cfg": {
249
- "_name_or_path": "./checkpoints/vila-siglip-llama3-8b-vila-v1.5-srgpt-sft-fixed/vision_tower",
250
  "add_cross_attention": false,
251
  "architectures": [
252
  "SiglipVisionModel"
 
1
  {
2
+ "_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B",
3
  "architectures": [
4
  "LlavaLlamaModel"
5
  ],
 
11
  "image_aspect_ratio": "resize",
12
  "interpolate_mode": "linear",
13
  "llm_cfg": {
14
+ "_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/llm",
15
  "add_cross_attention": false,
16
  "architectures": [
17
  "LlamaForCausalLM"
 
96
  },
97
  "mm_hidden_size": 1152,
98
  "mm_projector_cfg": {
99
+ "_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/mm_projector",
100
  "add_cross_attention": false,
101
  "architectures": [
102
  "MultimodalProjector"
 
170
  "model_type": "llava_llama",
171
  "num_video_frames": 8,
172
  "region_extractor_cfg": {
173
+ "_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/region_extractor",
174
  "add_cross_attention": false,
175
  "architectures": [
176
  "RegionExtractor"
 
235
  "typical_p": 1.0,
236
  "use_bfloat16": false
237
  },
238
+ "resume_path": "./checkpoints/SpatialRGPT-VILA1.5-8B",
239
  "s2": false,
240
  "s2_max_split_size": 336,
241
  "s2_scales": "336,672,1008",
 
246
  "tune_vision_tower": true,
247
  "vision_resolution": -1,
248
  "vision_tower_cfg": {
249
+ "_name_or_path": "./checkpoints/SpatialRGPT-VILA1.5-8B/vision_tower",
250
  "add_cross_attention": false,
251
  "architectures": [
252
  "SiglipVisionModel"