qwen_weight

Browse files

Files changed (10) hide show

ppllava_qwen7b_llavahound300k/config.json +226 -0
ppllava_qwen7b_llavahound300k/generation_config.json +7 -0
ppllava_qwen7b_llavahound300k/model-00001-of-00004.safetensors +3 -0
ppllava_qwen7b_llavahound300k/model-00002-of-00004.safetensors +3 -0
ppllava_qwen7b_llavahound300k/model-00003-of-00004.safetensors +3 -0
ppllava_qwen7b_llavahound300k/model-00004-of-00004.safetensors +3 -0
ppllava_qwen7b_llavahound300k/model.safetensors.index.json +0 -0
ppllava_qwen7b_llavahound300k/runs/Oct23_12-32-41_bore-prod-ruyangliu-20241022110203-worker-0/events.out.tfevents.1729657992.bore-prod-ruyangliu-20241022110203-worker-0.771139.0 +3 -0
ppllava_qwen7b_llavahound300k/trainer_state.json +175 -0
ppllava_qwen7b_llavahound300k/training_args.bin +3 -0

ppllava_qwen7b_llavahound300k/config.json ADDED Viewed

	@@ -0,0 +1,226 @@

+{
+  "_name_or_path": "/raid/raushan/si-7b",
+  "architectures": [
+    "LlavaInterleaveForConditionalGeneration"
+  ],
+  "btadapter": false,
+  "btadapter_depth": 4,
+  "clip_post_pretrain": null,
+  "clip_weight": "google/siglip-so400m-patch14-384",
+  "frame_shape": [
+    27,
+    27
+  ],
+  "hidden_size": 3584,
+  "ignore_index": -100,
+  "image_grid_pinpoints": [
+    [
+      384,
+      384
+    ],
+    [
+      384,
+      768
+    ],
+    [
+      384,
+      1152
+    ],
+    [
+      384,
+      1536
+    ],
+    [
+      384,
+      1920
+    ],
+    [
+      384,
+      2304
+    ],
+    [
+      768,
+      384
+    ],
+    [
+      768,
+      768
+    ],
+    [
+      768,
+      1152
+    ],
+    [
+      768,
+      1536
+    ],
+    [
+      768,
+      1920
+    ],
+    [
+      768,
+      2304
+    ],
+    [
+      1152,
+      384
+    ],
+    [
+      1152,
+      768
+    ],
+    [
+      1152,
+      1152
+    ],
+    [
+      1152,
+      1536
+    ],
+    [
+      1152,
+      1920
+    ],
+    [
+      1152,
+      2304
+    ],
+    [
+      1536,
+      384
+    ],
+    [
+      1536,
+      768
+    ],
+    [
+      1536,
+      1152
+    ],
+    [
+      1536,
+      1536
+    ],
+    [
+      1536,
+      1920
+    ],
+    [
+      1536,
+      2304
+    ],
+    [
+      1920,
+      384
+    ],
+    [
+      1920,
+      768
+    ],
+    [
+      1920,
+      1152
+    ],
+    [
+      1920,
+      1536
+    ],
+    [
+      1920,
+      1920
+    ],
+    [
+      1920,
+      2304
+    ],
+    [
+      2304,
+      384
+    ],
+    [
+      2304,
+      768
+    ],
+    [
+      2304,
+      1152
+    ],
+    [
+      2304,
+      1536
+    ],
+    [
+      2304,
+      1920
+    ],
+    [
+      2304,
+      2304
+    ]
+  ],
+  "image_pooling_kernel": [
+    1,
+    3,
+    3
+  ],
+  "image_pooling_stride": [
+    1,
+    3,
+    3
+  ],
+  "image_token_index": 151646,
+  "long_clip": true,
+  "max_T": 64,
+  "model_type": "llava_onevision",
+  "pad_token_id": 151643,
+  "pooling": "clipST_3d",
+  "pooling_kernel": [
+    2,
+    3,
+    3
+  ],
+  "pooling_stride": [
+    2,
+    3,
+    3
+  ],
+  "pooling_temp": 0.01,
+  "projector_hidden_act": "gelu",
+  "qwen": true,
+  "text_config": {
+    "_name_or_path": "Qwen/Qwen2-7B-Instruct",
+    "architectures": [
+      "Qwen2ForCausalLM"
+    ],
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "hidden_size": 3584,
+    "intermediate_size": 18944,
+    "model_type": "qwen2",
+    "num_attention_heads": 28,
+    "num_hidden_layers": 28,
+    "num_key_value_heads": 4,
+    "rope_theta": 1000000.0,
+    "torch_dtype": "bfloat16",
+    "vocab_size": 152128
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.2",
+  "use_image_newline_parameter": true,
+  "video_token_index": 151647,
+  "vision_aspect_ratio": "anyres_max_9",
+  "vision_config": {
+    "hidden_size": 1152,
+    "image_size": 384,
+    "intermediate_size": 4304,
+    "model_type": "siglip_vision_model",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 26,
+    "patch_size": 14,
+    "vision_use_head": false
+  },
+  "vision_feature_layer": -1,
+  "vision_feature_select_strategy": "full"
+}

ppllava_qwen7b_llavahound300k/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "pad_token_id": 151643,
+  "transformers_version": "4.45.2"
+}

ppllava_qwen7b_llavahound300k/model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da333b38bc421c35f4ce1b93a2327a4ab5ae25c7058afb04cf2d8bfc952c28e5
+size 4909741188

ppllava_qwen7b_llavahound300k/model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d78689f8c0c153346fc4f0ac48e1b529672974069da747abf796570c8c35941
+size 4991497768

ppllava_qwen7b_llavahound300k/model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe2b68e0bb66798772203dd2350b6d6a9940b91649a1f5d507f19150e417ff69
+size 4932752872

ppllava_qwen7b_llavahound300k/model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf32964185a428b622027f189af55280767ec4d30281df2efbb3894d5c18ba40
+size 2158500536

ppllava_qwen7b_llavahound300k/model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ppllava_qwen7b_llavahound300k/runs/Oct23_12-32-41_bore-prod-ruyangliu-20241022110203-worker-0/events.out.tfevents.1729657992.bore-prod-ruyangliu-20241022110203-worker-0.771139.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7828017dc98626eb87ab305ef43f0d6525d106dd64ece11f13fdcf5b33fae57f
+size 12109

ppllava_qwen7b_llavahound300k/trainer_state.json ADDED Viewed

	@@ -0,0 +1,175 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9997490589711417,
+  "eval_steps": 500,
+  "global_step": 996,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.050188205771643665,
+      "grad_norm": 2.3202156020973708,
+      "learning_rate": 9.989427142584392e-06,
+      "loss": 1.5003,
+      "step": 50
+    },
+    {
+      "epoch": 0.10037641154328733,
+      "grad_norm": 2.314291595101664,
+      "learning_rate": 9.870995413367397e-06,
+      "loss": 1.3883,
+      "step": 100
+    },
+    {
+      "epoch": 0.15056461731493098,
+      "grad_norm": 2.314931158603531,
+      "learning_rate": 9.624050979896533e-06,
+      "loss": 1.3754,
+      "step": 150
+    },
+    {
+      "epoch": 0.20075282308657466,
+      "grad_norm": 2.152003211136021,
+      "learning_rate": 9.255109039631998e-06,
+      "loss": 1.3628,
+      "step": 200
+    },
+    {
+      "epoch": 0.25094102885821834,
+      "grad_norm": 2.1047973694759157,
+      "learning_rate": 8.773903481118611e-06,
+      "loss": 1.3543,
+      "step": 250
+    },
+    {
+      "epoch": 0.30112923462986196,
+      "grad_norm": 2.1025984809300318,
+      "learning_rate": 8.193130072341872e-06,
+      "loss": 1.3515,
+      "step": 300
+    },
+    {
+      "epoch": 0.35131744040150564,
+      "grad_norm": 2.2036136599303076,
+      "learning_rate": 7.528111505069428e-06,
+      "loss": 1.3419,
+      "step": 350
+    },
+    {
+      "epoch": 0.4015056461731493,
+      "grad_norm": 2.1361038466937563,
+      "learning_rate": 6.796393132397829e-06,
+      "loss": 1.3361,
+      "step": 400
+    },
+    {
+      "epoch": 0.451693851944793,
+      "grad_norm": 2.1253080705715175,
+      "learning_rate": 6.0172800652631706e-06,
+      "loss": 1.3336,
+      "step": 450
+    },
+    {
+      "epoch": 0.5018820577164367,
+      "grad_norm": 2.094902290851612,
+      "learning_rate": 5.211327840815459e-06,
+      "loss": 1.321,
+      "step": 500
+    },
+    {
+      "epoch": 0.5520702634880803,
+      "grad_norm": 2.086008706466924,
+      "learning_rate": 4.399800100481858e-06,
+      "loss": 1.3173,
+      "step": 550
+    },
+    {
+      "epoch": 0.6022584692597239,
+      "grad_norm": 2.084376290248031,
+      "learning_rate": 3.6041075859356383e-06,
+      "loss": 1.3044,
+      "step": 600
+    },
+    {
+      "epoch": 0.6524466750313677,
+      "grad_norm": 2.1873617946017525,
+      "learning_rate": 2.845243254082134e-06,
+      "loss": 1.3029,
+      "step": 650
+    },
+    {
+      "epoch": 0.7026348808030113,
+      "grad_norm": 2.1539300794808733,
+      "learning_rate": 2.1432284145659104e-06,
+      "loss": 1.2977,
+      "step": 700
+    },
+    {
+      "epoch": 0.7528230865746549,
+      "grad_norm": 2.100942527337154,
+      "learning_rate": 1.5165845024934366e-06,
+      "loss": 1.3072,
+      "step": 750
+    },
+    {
+      "epoch": 0.8030112923462986,
+      "grad_norm": 2.2004509246977477,
+      "learning_rate": 9.81844422725109e-07,
+      "loss": 1.3014,
+      "step": 800
+    },
+    {
+      "epoch": 0.8531994981179423,
+      "grad_norm": 2.148051612184209,
+      "learning_rate": 5.531163580638483e-07,
+      "loss": 1.2948,
+      "step": 850
+    },
+    {
+      "epoch": 0.903387703889586,
+      "grad_norm": 2.0857628452747248,
+      "learning_rate": 2.417115494991107e-07,
+      "loss": 1.2925,
+      "step": 900
+    },
+    {
+      "epoch": 0.9535759096612296,
+      "grad_norm": 2.1477939821041936,
+      "learning_rate": 5.584586887435739e-08,
+      "loss": 1.2951,
+      "step": 950
+    },
+    {
+      "epoch": 0.9997490589711417,
+      "step": 996,
+      "total_flos": 3245135674474496.0,
+      "train_loss": 1.3331998112690018,
+      "train_runtime": 34717.8895,
+      "train_samples_per_second": 7.345,
+      "train_steps_per_second": 0.029
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 996,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 420,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3245135674474496.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

ppllava_qwen7b_llavahound300k/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f7cc6ad8ff8f49f3b95141d4510548fa0bfdec4021e61ae08fc3cf7bfb8ae0c
+size 7352