salma-remyx commited on about 1 month ago

Commit

c9d886b

•

1 Parent(s): 6a65a93

Training in progress, epoch 3

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

adapter_config.json +6 -6
adapter_model.safetensors +1 -1
merged/added_tokens.json +16 -0
merged/chat_template.json +3 -0
merged/config.json +47 -0
merged/generation_config.json +13 -0
merged/merges.txt +0 -0
merged/model-00001-of-00018.safetensors +3 -0
merged/model-00002-of-00018.safetensors +3 -0
merged/model-00003-of-00018.safetensors +3 -0
merged/model-00004-of-00018.safetensors +3 -0
merged/model-00005-of-00018.safetensors +3 -0
merged/model-00006-of-00018.safetensors +3 -0
merged/model-00007-of-00018.safetensors +3 -0
merged/model-00008-of-00018.safetensors +3 -0
merged/model-00009-of-00018.safetensors +3 -0
merged/model-00010-of-00018.safetensors +3 -0
merged/model-00011-of-00018.safetensors +3 -0
merged/model-00012-of-00018.safetensors +3 -0
merged/model-00013-of-00018.safetensors +3 -0
merged/model-00014-of-00018.safetensors +3 -0
merged/model-00015-of-00018.safetensors +3 -0
merged/model-00016-of-00018.safetensors +3 -0
merged/model-00017-of-00018.safetensors +3 -0
merged/model-00018-of-00018.safetensors +3 -0
merged/model.safetensors.index.json +737 -0
merged/preprocessor_config.json +29 -0
merged/special_tokens_map.json +31 -0
merged/tokenizer.json +3 -0
merged/tokenizer_config.json +144 -0
merged/vocab.json +0 -0
spaceqwen2-7b-instruct/checkpoint-1446/README.md +202 -0
spaceqwen2-7b-instruct/checkpoint-1446/adapter_config.json +36 -0
spaceqwen2-7b-instruct/checkpoint-1446/adapter_model.safetensors +3 -0
spaceqwen2-7b-instruct/checkpoint-1446/added_tokens.json +16 -0
spaceqwen2-7b-instruct/checkpoint-1446/merges.txt +0 -0
spaceqwen2-7b-instruct/checkpoint-1446/optimizer.pt +3 -0
spaceqwen2-7b-instruct/checkpoint-1446/rng_state.pth +3 -0
spaceqwen2-7b-instruct/checkpoint-1446/scheduler.pt +3 -0
spaceqwen2-7b-instruct/checkpoint-1446/special_tokens_map.json +31 -0
spaceqwen2-7b-instruct/checkpoint-1446/tokenizer.json +3 -0
spaceqwen2-7b-instruct/checkpoint-1446/tokenizer_config.json +143 -0
spaceqwen2-7b-instruct/checkpoint-1446/trainer_state.json +2056 -0
spaceqwen2-7b-instruct/checkpoint-1446/training_args.bin +3 -0
spaceqwen2-7b-instruct/checkpoint-1446/vocab.json +0 -0
spaceqwen2-7b-instruct/checkpoint-2169/README.md +202 -0
spaceqwen2-7b-instruct/checkpoint-2169/adapter_config.json +36 -0
spaceqwen2-7b-instruct/checkpoint-2169/adapter_model.safetensors +3 -0
spaceqwen2-7b-instruct/checkpoint-2169/added_tokens.json +16 -0
spaceqwen2-7b-instruct/checkpoint-2169/merges.txt +0 -0

adapter_config.json CHANGED Viewed

@@ -20,15 +20,15 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "c_fc",
-    "out_proj",
-    "c_attn",
-    "v_proj",
     "w1",
     "attn.c_proj",
     "q_proj",
-    "in_proj",
-    "w2"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "w1",
+    "out_proj",
+    "w2",
     "attn.c_proj",
+    "c_attn",
+    "c_fc",
     "q_proj",
+    "v_proj",
+    "in_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ea4fcb7262d44ebd7acc0b0623fe2bcb94f40bcb5b5ecbdc8859b3c8f1b4951
 size 161495976

 version https://git-lfs.github.com/spec/v1
+oid sha256:504cedc11387823686ab485bc264ffdd976932c586827eec9bb16bf13a6a0a71
 size 161495976

merged/added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

merged/chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

merged/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "Qwen/Qwen2-VL-7B-Instruct",
+  "architectures": [
+    "Qwen2VLForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2_vl",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
+    ],
+    "rope_type": "default",
+    "type": "default"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
+    "in_chans": 3,
+    "model_type": "qwen2_vl",
+    "spatial_patch_size": 14
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 152064
+}

merged/generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.01,
+  "top_k": 1,
+  "top_p": 0.001,
+  "transformers_version": "4.46.0"
+}

merged/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

merged/model-00001-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4635b62ada7167be53b8e4872a3e53910d8b581d6d78045b6efb6c702c41f2e6
+size 1993493408

merged/model-00002-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb2626c30c4b1770e38cf764ec0e2244dd839a5a123ba43e385d4aba83b51cab
+size 2179989640

merged/model-00003-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:064bd9ff2142a79cee1f2e60b5336fa90c09e4644e22f034cc8382584f2e5f0d
+size 1759275664

merged/model-00004-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01a9120d630e818ea828f2b641f37b8a55e767abb5595a58d932d1fe48be9210
+size 1864465064

merged/model-00005-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d89ff973c17fa16a7c8625885143765b6b0597df96f910f1acc2e659a25a92ad
+size 1864465064

merged/model-00006-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9db40d30652427c0b68efe82623647a045440dbfbf4f29257017d44ae0cc199d
+size 1864465064

merged/model-00007-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9ef2e38dc0b7c50b96c54a76e4946f7bdbb79b0d682425cae260afda9165b4b
+size 1864465064

merged/model-00008-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff66e6c61f9633961ea1e44d8ac433c54dfce6c41175727967221e38ec65457a
+size 1864465064

merged/model-00009-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:508fe3889485ededb70f4e76e933de30c24d59c83f169bd1d8640b257b7f587e
+size 1864465088

merged/model-00010-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be9ac257504997bf910697cee07e68c7ee895226b8f48f402879202a855273fa
+size 1864465088

merged/model-00011-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f9830ba04b24c9bc9c4d39739b79bbd307eb70fd5fcbc7802f182e950a4d3ad
+size 1864465088

merged/model-00012-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d525d2d225a90c2d74f6f80117939cd245392d4c4bcf2c96c9291ec763e49e3f
+size 1864465088

merged/model-00013-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2321b5fc38ea76503775fcdae7ab0c5e16d44f02775f41bb03d6ada7feac1473
+size 1864465088

merged/model-00014-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cba3a13e14d4d77fc8152bcfcf5962a2cde5acba7581fa5cf69074d8b55fddc2
+size 1864465088

merged/model-00015-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7442245858da36c1c4da01d544c00ea01a482b5e4bd169cf36c1978d22c65021
+size 1864465088

merged/model-00016-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5983e281c007f6c074c7302bb7fbdd35b63298abbd32f0d40b83e5899e8f8b98
+size 1864465088

merged/model-00017-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9622a9474f974a77ff85f2fd5c8ecd4eb85815a4644af58546baf2124e1b3eab
+size 2179989632

merged/model-00018-of-00018.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:344b3fb505917dcfd24a03c09d3be7b63f71c97f1fabc1c3cc9f8dac48a7ae82
+size 814787240

merged/model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,737 @@

+{
+  "metadata": {
+    "total_size": 33165502464
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00017-of-00018.safetensors",
+    "model.embed_tokens.weight": "model-00002-of-00018.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00003-of-00018.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00003-of-00018.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00003-of-00018.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00004-of-00018.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00018.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00003-of-00018.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00003-of-00018.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00003-of-00018.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00018.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00008-of-00018.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00008-of-00018.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00008-of-00018.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00009-of-00018.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00009-of-00018.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00008-of-00018.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00008-of-00018.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00008-of-00018.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00009-of-00018.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00009-of-00018.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00009-of-00018.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00010-of-00018.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00010-of-00018.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00009-of-00018.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00009-of-00018.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00009-of-00018.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00009-of-00018.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00010-of-00018.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00010-of-00018.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00010-of-00018.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00011-of-00018.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00011-of-00018.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00010-of-00018.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00010-of-00018.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00010-of-00018.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00010-of-00018.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00011-of-00018.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00011-of-00018.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00011-of-00018.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00012-of-00018.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00012-of-00018.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00011-of-00018.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00011-of-00018.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00011-of-00018.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00011-of-00018.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00012-of-00018.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00012-of-00018.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00012-of-00018.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00013-of-00018.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00013-of-00018.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00012-of-00018.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00012-of-00018.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00012-of-00018.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00012-of-00018.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00004-of-00018.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00004-of-00018.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00004-of-00018.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00013-of-00018.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00013-of-00018.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00013-of-00018.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00014-of-00018.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00014-of-00018.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00013-of-00018.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00013-of-00018.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00013-of-00018.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00013-of-00018.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00014-of-00018.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00014-of-00018.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00014-of-00018.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00015-of-00018.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00015-of-00018.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00014-of-00018.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00014-of-00018.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00014-of-00018.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00014-of-00018.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00015-of-00018.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00015-of-00018.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00015-of-00018.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00016-of-00018.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00016-of-00018.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00015-of-00018.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00015-of-00018.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00015-of-00018.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00015-of-00018.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00016-of-00018.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00016-of-00018.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00016-of-00018.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00018-of-00018.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00018-of-00018.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00018-of-00018.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00018-of-00018.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00018-of-00018.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00016-of-00018.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00016-of-00018.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00016-of-00018.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00016-of-00018.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00005-of-00018.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00018.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00004-of-00018.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00004-of-00018.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00004-of-00018.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00018.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00005-of-00018.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00005-of-00018.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00005-of-00018.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00006-of-00018.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00018.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00005-of-00018.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00005-of-00018.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00005-of-00018.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00005-of-00018.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00006-of-00018.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00006-of-00018.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00006-of-00018.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00007-of-00018.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00018.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00006-of-00018.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00006-of-00018.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00006-of-00018.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00018.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00007-of-00018.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00007-of-00018.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00007-of-00018.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00008-of-00018.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00008-of-00018.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00008-of-00018.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00007-of-00018.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00007-of-00018.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00018.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00007-of-00018.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00018.safetensors",
+    "model.norm.weight": "model-00018-of-00018.safetensors",
+    "visual.blocks.0.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.0.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.1.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.10.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.11.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.12.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.13.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.14.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.15.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.16.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.17.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.18.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.19.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.2.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.20.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.21.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.22.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.23.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.24.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.25.attn.proj.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.25.attn.proj.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.25.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.25.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.25.mlp.fc1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.25.mlp.fc1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.25.mlp.fc2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.25.mlp.fc2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.25.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.25.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.25.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.25.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.26.attn.proj.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.attn.proj.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.attn.qkv.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.attn.qkv.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.mlp.fc1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.mlp.fc1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.mlp.fc2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.mlp.fc2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.norm1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.norm1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.norm2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.26.norm2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.attn.proj.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.attn.proj.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.attn.qkv.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.attn.qkv.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.mlp.fc1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.mlp.fc1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.mlp.fc2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.mlp.fc2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.norm1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.norm1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.norm2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.27.norm2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.attn.proj.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.attn.proj.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.attn.qkv.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.attn.qkv.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.mlp.fc1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.mlp.fc1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.mlp.fc2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.mlp.fc2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.norm1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.norm1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.norm2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.28.norm2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.attn.proj.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.attn.proj.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.attn.qkv.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.attn.qkv.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.mlp.fc1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.mlp.fc1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.mlp.fc2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.mlp.fc2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.norm1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.norm1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.norm2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.29.norm2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.3.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.3.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.30.attn.proj.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.attn.proj.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.attn.qkv.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.attn.qkv.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.mlp.fc1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.mlp.fc1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.mlp.fc2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.mlp.fc2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.norm1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.norm1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.norm2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.30.norm2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.attn.proj.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.attn.proj.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.attn.qkv.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.attn.qkv.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.mlp.fc1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.mlp.fc1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.mlp.fc2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.mlp.fc2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.norm1.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.norm1.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.norm2.bias": "model-00003-of-00018.safetensors",
+    "visual.blocks.31.norm2.weight": "model-00003-of-00018.safetensors",
+    "visual.blocks.4.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.4.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.5.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.6.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.7.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.8.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.attn.proj.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.attn.proj.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.attn.qkv.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.attn.qkv.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.mlp.fc1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.mlp.fc1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.mlp.fc2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.mlp.fc2.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.norm1.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.norm1.weight": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.norm2.bias": "model-00001-of-00018.safetensors",
+    "visual.blocks.9.norm2.weight": "model-00001-of-00018.safetensors",
+    "visual.merger.ln_q.bias": "model-00003-of-00018.safetensors",
+    "visual.merger.ln_q.weight": "model-00003-of-00018.safetensors",
+    "visual.merger.mlp.0.bias": "model-00003-of-00018.safetensors",
+    "visual.merger.mlp.0.weight": "model-00003-of-00018.safetensors",
+    "visual.merger.mlp.2.bias": "model-00003-of-00018.safetensors",
+    "visual.merger.mlp.2.weight": "model-00003-of-00018.safetensors",
+    "visual.patch_embed.proj.weight": "model-00001-of-00018.safetensors"
+  }
+}

merged/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "Qwen2VLImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "max_pixels": 12845056,
+    "min_pixels": 3136
+  },
+  "temporal_patch_size": 2
+}

merged/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

merged/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:091aa7594dc2fcfbfa06b9e3c22a5f0562ac14f30375c13af7309407a0e67b8a
+size 11420371

merged/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,144 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 32768,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "processor_class": "Qwen2VLProcessor",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

merged/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

spaceqwen2-7b-instruct/checkpoint-1446/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen2-VL-7B-Instruct
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

spaceqwen2-7b-instruct/checkpoint-1446/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2-VL-7B-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_fc",
+    "out_proj",
+    "c_attn",
+    "v_proj",
+    "w1",
+    "attn.c_proj",
+    "q_proj",
+    "in_proj",
+    "w2"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

spaceqwen2-7b-instruct/checkpoint-1446/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2f6d901249a8f7f4dddac1585918f94e82452a47ef072f08235053d9d72c6f8
+size 161495976

spaceqwen2-7b-instruct/checkpoint-1446/added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

spaceqwen2-7b-instruct/checkpoint-1446/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

spaceqwen2-7b-instruct/checkpoint-1446/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:017b1a844ad5e6a26f0a10d939c641401e8f11e4ab041c9341d01033d6b7e325
+size 323057658

spaceqwen2-7b-instruct/checkpoint-1446/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:217138a14785e037c38e00a03bb1ee43fcd930247ed3108e9dac78d49161321b
+size 14244

spaceqwen2-7b-instruct/checkpoint-1446/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52b57148a9f9ce541f709b3949c7b747c6ca8d25300f8462fbdb8e0195d1862a
+size 1064

spaceqwen2-7b-instruct/checkpoint-1446/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spaceqwen2-7b-instruct/checkpoint-1446/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88a3a6fcb80132f76da8aa40cdc3fccd7e5d8468ef15421f5b0c2715e85217d2
+size 11420538

spaceqwen2-7b-instruct/checkpoint-1446/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 32768,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

spaceqwen2-7b-instruct/checkpoint-1446/trainer_state.json ADDED Viewed

	@@ -0,0 +1,2056 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9996542783059637,
+  "eval_steps": 500,
+  "global_step": 1446,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.006914433880726016,
+      "grad_norm": 25.907793045043945,
+      "learning_rate": 2e-05,
+      "loss": 25.3491,
+      "step": 5
+    },
+    {
+      "epoch": 0.013828867761452032,
+      "grad_norm": 17.689342498779297,
+      "learning_rate": 2e-05,
+      "loss": 21.7905,
+      "step": 10
+    },
+    {
+      "epoch": 0.020743301642178046,
+      "grad_norm": 17.454442977905273,
+      "learning_rate": 2e-05,
+      "loss": 19.3267,
+      "step": 15
+    },
+    {
+      "epoch": 0.027657735522904063,
+      "grad_norm": 14.737072944641113,
+      "learning_rate": 2e-05,
+      "loss": 17.0647,
+      "step": 20
+    },
+    {
+      "epoch": 0.03457216940363008,
+      "grad_norm": 8.00361442565918,
+      "learning_rate": 2e-05,
+      "loss": 15.7952,
+      "step": 25
+    },
+    {
+      "epoch": 0.04148660328435609,
+      "grad_norm": 8.17756175994873,
+      "learning_rate": 2e-05,
+      "loss": 15.1069,
+      "step": 30
+    },
+    {
+      "epoch": 0.04840103716508211,
+      "grad_norm": 9.675758361816406,
+      "learning_rate": 2e-05,
+      "loss": 14.4795,
+      "step": 35
+    },
+    {
+      "epoch": 0.055315471045808126,
+      "grad_norm": 7.8849616050720215,
+      "learning_rate": 2e-05,
+      "loss": 13.559,
+      "step": 40
+    },
+    {
+      "epoch": 0.06222990492653414,
+      "grad_norm": 8.653643608093262,
+      "learning_rate": 2e-05,
+      "loss": 13.1711,
+      "step": 45
+    },
+    {
+      "epoch": 0.06914433880726016,
+      "grad_norm": 9.694263458251953,
+      "learning_rate": 2e-05,
+      "loss": 12.4436,
+      "step": 50
+    },
+    {
+      "epoch": 0.07605877268798616,
+      "grad_norm": 9.167488098144531,
+      "learning_rate": 2e-05,
+      "loss": 12.0127,
+      "step": 55
+    },
+    {
+      "epoch": 0.08297320656871218,
+      "grad_norm": 9.45940113067627,
+      "learning_rate": 2e-05,
+      "loss": 11.6855,
+      "step": 60
+    },
+    {
+      "epoch": 0.0898876404494382,
+      "grad_norm": 11.105853080749512,
+      "learning_rate": 2e-05,
+      "loss": 11.2729,
+      "step": 65
+    },
+    {
+      "epoch": 0.09680207433016422,
+      "grad_norm": 10.929551124572754,
+      "learning_rate": 2e-05,
+      "loss": 10.7518,
+      "step": 70
+    },
+    {
+      "epoch": 0.10371650821089023,
+      "grad_norm": 15.473309516906738,
+      "learning_rate": 2e-05,
+      "loss": 10.7451,
+      "step": 75
+    },
+    {
+      "epoch": 0.11063094209161625,
+      "grad_norm": 17.457305908203125,
+      "learning_rate": 2e-05,
+      "loss": 10.3165,
+      "step": 80
+    },
+    {
+      "epoch": 0.11754537597234227,
+      "grad_norm": 20.109453201293945,
+      "learning_rate": 2e-05,
+      "loss": 9.7337,
+      "step": 85
+    },
+    {
+      "epoch": 0.12445980985306827,
+      "grad_norm": 16.452909469604492,
+      "learning_rate": 2e-05,
+      "loss": 9.0107,
+      "step": 90
+    },
+    {
+      "epoch": 0.1313742437337943,
+      "grad_norm": 13.92202377319336,
+      "learning_rate": 2e-05,
+      "loss": 8.9706,
+      "step": 95
+    },
+    {
+      "epoch": 0.13828867761452032,
+      "grad_norm": 14.07268238067627,
+      "learning_rate": 2e-05,
+      "loss": 8.746,
+      "step": 100
+    },
+    {
+      "epoch": 0.14520311149524634,
+      "grad_norm": 13.464680671691895,
+      "learning_rate": 2e-05,
+      "loss": 9.0666,
+      "step": 105
+    },
+    {
+      "epoch": 0.15211754537597233,
+      "grad_norm": 13.252619743347168,
+      "learning_rate": 2e-05,
+      "loss": 8.2523,
+      "step": 110
+    },
+    {
+      "epoch": 0.15903197925669835,
+      "grad_norm": 16.04055404663086,
+      "learning_rate": 2e-05,
+      "loss": 8.3208,
+      "step": 115
+    },
+    {
+      "epoch": 0.16594641313742436,
+      "grad_norm": 23.16919708251953,
+      "learning_rate": 2e-05,
+      "loss": 7.9612,
+      "step": 120
+    },
+    {
+      "epoch": 0.17286084701815038,
+      "grad_norm": 22.345563888549805,
+      "learning_rate": 2e-05,
+      "loss": 8.1263,
+      "step": 125
+    },
+    {
+      "epoch": 0.1797752808988764,
+      "grad_norm": 14.41594123840332,
+      "learning_rate": 2e-05,
+      "loss": 7.7958,
+      "step": 130
+    },
+    {
+      "epoch": 0.18668971477960242,
+      "grad_norm": 16.455421447753906,
+      "learning_rate": 2e-05,
+      "loss": 7.7192,
+      "step": 135
+    },
+    {
+      "epoch": 0.19360414866032843,
+      "grad_norm": 15.813484191894531,
+      "learning_rate": 2e-05,
+      "loss": 7.7602,
+      "step": 140
+    },
+    {
+      "epoch": 0.20051858254105445,
+      "grad_norm": 16.902507781982422,
+      "learning_rate": 2e-05,
+      "loss": 7.618,
+      "step": 145
+    },
+    {
+      "epoch": 0.20743301642178047,
+      "grad_norm": 20.566701889038086,
+      "learning_rate": 2e-05,
+      "loss": 7.668,
+      "step": 150
+    },
+    {
+      "epoch": 0.2143474503025065,
+      "grad_norm": 15.668231964111328,
+      "learning_rate": 2e-05,
+      "loss": 7.6088,
+      "step": 155
+    },
+    {
+      "epoch": 0.2212618841832325,
+      "grad_norm": 15.046396255493164,
+      "learning_rate": 2e-05,
+      "loss": 7.4022,
+      "step": 160
+    },
+    {
+      "epoch": 0.22817631806395852,
+      "grad_norm": 15.979242324829102,
+      "learning_rate": 2e-05,
+      "loss": 7.5034,
+      "step": 165
+    },
+    {
+      "epoch": 0.23509075194468454,
+      "grad_norm": 14.038537979125977,
+      "learning_rate": 2e-05,
+      "loss": 7.517,
+      "step": 170
+    },
+    {
+      "epoch": 0.24200518582541056,
+      "grad_norm": 14.610211372375488,
+      "learning_rate": 2e-05,
+      "loss": 7.3982,
+      "step": 175
+    },
+    {
+      "epoch": 0.24891961970613655,
+      "grad_norm": 17.241483688354492,
+      "learning_rate": 2e-05,
+      "loss": 7.2793,
+      "step": 180
+    },
+    {
+      "epoch": 0.25583405358686256,
+      "grad_norm": 15.71363639831543,
+      "learning_rate": 2e-05,
+      "loss": 7.2521,
+      "step": 185
+    },
+    {
+      "epoch": 0.2627484874675886,
+      "grad_norm": 16.244775772094727,
+      "learning_rate": 2e-05,
+      "loss": 7.1932,
+      "step": 190
+    },
+    {
+      "epoch": 0.2696629213483146,
+      "grad_norm": 15.033608436584473,
+      "learning_rate": 2e-05,
+      "loss": 7.1747,
+      "step": 195
+    },
+    {
+      "epoch": 0.27657735522904064,
+      "grad_norm": 16.86004066467285,
+      "learning_rate": 2e-05,
+      "loss": 7.0946,
+      "step": 200
+    },
+    {
+      "epoch": 0.28349178910976663,
+      "grad_norm": 15.670988082885742,
+      "learning_rate": 2e-05,
+      "loss": 7.0308,
+      "step": 205
+    },
+    {
+      "epoch": 0.2904062229904927,
+      "grad_norm": 17.024839401245117,
+      "learning_rate": 2e-05,
+      "loss": 7.0582,
+      "step": 210
+    },
+    {
+      "epoch": 0.29732065687121867,
+      "grad_norm": 16.537487030029297,
+      "learning_rate": 2e-05,
+      "loss": 6.9094,
+      "step": 215
+    },
+    {
+      "epoch": 0.30423509075194466,
+      "grad_norm": 12.021300315856934,
+      "learning_rate": 2e-05,
+      "loss": 7.0259,
+      "step": 220
+    },
+    {
+      "epoch": 0.3111495246326707,
+      "grad_norm": 14.021350860595703,
+      "learning_rate": 2e-05,
+      "loss": 7.0716,
+      "step": 225
+    },
+    {
+      "epoch": 0.3180639585133967,
+      "grad_norm": 15.873709678649902,
+      "learning_rate": 2e-05,
+      "loss": 6.8763,
+      "step": 230
+    },
+    {
+      "epoch": 0.32497839239412274,
+      "grad_norm": 15.506514549255371,
+      "learning_rate": 2e-05,
+      "loss": 6.8865,
+      "step": 235
+    },
+    {
+      "epoch": 0.33189282627484873,
+      "grad_norm": 15.858102798461914,
+      "learning_rate": 2e-05,
+      "loss": 6.8591,
+      "step": 240
+    },
+    {
+      "epoch": 0.3388072601555748,
+      "grad_norm": 15.617164611816406,
+      "learning_rate": 2e-05,
+      "loss": 6.8315,
+      "step": 245
+    },
+    {
+      "epoch": 0.34572169403630076,
+      "grad_norm": 14.009385108947754,
+      "learning_rate": 2e-05,
+      "loss": 6.8707,
+      "step": 250
+    },
+    {
+      "epoch": 0.3526361279170268,
+      "grad_norm": 12.869144439697266,
+      "learning_rate": 2e-05,
+      "loss": 6.7915,
+      "step": 255
+    },
+    {
+      "epoch": 0.3595505617977528,
+      "grad_norm": 16.024009704589844,
+      "learning_rate": 2e-05,
+      "loss": 6.7395,
+      "step": 260
+    },
+    {
+      "epoch": 0.36646499567847884,
+      "grad_norm": 14.30862808227539,
+      "learning_rate": 2e-05,
+      "loss": 6.6352,
+      "step": 265
+    },
+    {
+      "epoch": 0.37337942955920483,
+      "grad_norm": 16.559965133666992,
+      "learning_rate": 2e-05,
+      "loss": 6.6893,
+      "step": 270
+    },
+    {
+      "epoch": 0.3802938634399309,
+      "grad_norm": 12.408181190490723,
+      "learning_rate": 2e-05,
+      "loss": 6.5961,
+      "step": 275
+    },
+    {
+      "epoch": 0.38720829732065687,
+      "grad_norm": 18.477039337158203,
+      "learning_rate": 2e-05,
+      "loss": 6.6104,
+      "step": 280
+    },
+    {
+      "epoch": 0.3941227312013829,
+      "grad_norm": 14.633271217346191,
+      "learning_rate": 2e-05,
+      "loss": 6.6158,
+      "step": 285
+    },
+    {
+      "epoch": 0.4010371650821089,
+      "grad_norm": 13.507740020751953,
+      "learning_rate": 2e-05,
+      "loss": 6.5376,
+      "step": 290
+    },
+    {
+      "epoch": 0.4079515989628349,
+      "grad_norm": 13.511082649230957,
+      "learning_rate": 2e-05,
+      "loss": 6.5213,
+      "step": 295
+    },
+    {
+      "epoch": 0.41486603284356094,
+      "grad_norm": 13.532166481018066,
+      "learning_rate": 2e-05,
+      "loss": 6.4779,
+      "step": 300
+    },
+    {
+      "epoch": 0.42178046672428693,
+      "grad_norm": 14.193647384643555,
+      "learning_rate": 2e-05,
+      "loss": 6.725,
+      "step": 305
+    },
+    {
+      "epoch": 0.428694900605013,
+      "grad_norm": 14.277159690856934,
+      "learning_rate": 2e-05,
+      "loss": 6.7683,
+      "step": 310
+    },
+    {
+      "epoch": 0.43560933448573896,
+      "grad_norm": 15.155255317687988,
+      "learning_rate": 2e-05,
+      "loss": 6.4752,
+      "step": 315
+    },
+    {
+      "epoch": 0.442523768366465,
+      "grad_norm": 14.28928279876709,
+      "learning_rate": 2e-05,
+      "loss": 6.6109,
+      "step": 320
+    },
+    {
+      "epoch": 0.449438202247191,
+      "grad_norm": 13.727865219116211,
+      "learning_rate": 2e-05,
+      "loss": 6.7241,
+      "step": 325
+    },
+    {
+      "epoch": 0.45635263612791704,
+      "grad_norm": 13.583052635192871,
+      "learning_rate": 2e-05,
+      "loss": 6.349,
+      "step": 330
+    },
+    {
+      "epoch": 0.46326707000864303,
+      "grad_norm": 14.269885063171387,
+      "learning_rate": 2e-05,
+      "loss": 6.3624,
+      "step": 335
+    },
+    {
+      "epoch": 0.4701815038893691,
+      "grad_norm": 11.396374702453613,
+      "learning_rate": 2e-05,
+      "loss": 6.4641,
+      "step": 340
+    },
+    {
+      "epoch": 0.47709593777009507,
+      "grad_norm": 14.38582706451416,
+      "learning_rate": 2e-05,
+      "loss": 6.5905,
+      "step": 345
+    },
+    {
+      "epoch": 0.4840103716508211,
+      "grad_norm": 11.609393119812012,
+      "learning_rate": 2e-05,
+      "loss": 6.3706,
+      "step": 350
+    },
+    {
+      "epoch": 0.4909248055315471,
+      "grad_norm": 13.301101684570312,
+      "learning_rate": 2e-05,
+      "loss": 6.5634,
+      "step": 355
+    },
+    {
+      "epoch": 0.4978392394122731,
+      "grad_norm": 14.179442405700684,
+      "learning_rate": 2e-05,
+      "loss": 6.3023,
+      "step": 360
+    },
+    {
+      "epoch": 0.5047536732929991,
+      "grad_norm": 13.965949058532715,
+      "learning_rate": 2e-05,
+      "loss": 6.5212,
+      "step": 365
+    },
+    {
+      "epoch": 0.5116681071737251,
+      "grad_norm": 13.542827606201172,
+      "learning_rate": 2e-05,
+      "loss": 6.1763,
+      "step": 370
+    },
+    {
+      "epoch": 0.5185825410544511,
+      "grad_norm": 16.06488800048828,
+      "learning_rate": 2e-05,
+      "loss": 6.4158,
+      "step": 375
+    },
+    {
+      "epoch": 0.5254969749351772,
+      "grad_norm": 15.495418548583984,
+      "learning_rate": 2e-05,
+      "loss": 6.454,
+      "step": 380
+    },
+    {
+      "epoch": 0.5324114088159032,
+      "grad_norm": 10.823579788208008,
+      "learning_rate": 2e-05,
+      "loss": 6.1206,
+      "step": 385
+    },
+    {
+      "epoch": 0.5393258426966292,
+      "grad_norm": 12.992563247680664,
+      "learning_rate": 2e-05,
+      "loss": 6.2743,
+      "step": 390
+    },
+    {
+      "epoch": 0.5462402765773552,
+      "grad_norm": 10.909781455993652,
+      "learning_rate": 2e-05,
+      "loss": 6.2523,
+      "step": 395
+    },
+    {
+      "epoch": 0.5531547104580813,
+      "grad_norm": 11.445661544799805,
+      "learning_rate": 2e-05,
+      "loss": 6.3599,
+      "step": 400
+    },
+    {
+      "epoch": 0.5600691443388073,
+      "grad_norm": 10.06051254272461,
+      "learning_rate": 2e-05,
+      "loss": 6.3006,
+      "step": 405
+    },
+    {
+      "epoch": 0.5669835782195333,
+      "grad_norm": 15.478804588317871,
+      "learning_rate": 2e-05,
+      "loss": 6.3255,
+      "step": 410
+    },
+    {
+      "epoch": 0.5738980121002593,
+      "grad_norm": 10.99301528930664,
+      "learning_rate": 2e-05,
+      "loss": 6.3989,
+      "step": 415
+    },
+    {
+      "epoch": 0.5808124459809854,
+      "grad_norm": 12.08956527709961,
+      "learning_rate": 2e-05,
+      "loss": 6.2625,
+      "step": 420
+    },
+    {
+      "epoch": 0.5877268798617113,
+      "grad_norm": 11.254467010498047,
+      "learning_rate": 2e-05,
+      "loss": 6.0973,
+      "step": 425
+    },
+    {
+      "epoch": 0.5946413137424373,
+      "grad_norm": 11.048693656921387,
+      "learning_rate": 2e-05,
+      "loss": 6.2117,
+      "step": 430
+    },
+    {
+      "epoch": 0.6015557476231633,
+      "grad_norm": 12.01796817779541,
+      "learning_rate": 2e-05,
+      "loss": 6.3143,
+      "step": 435
+    },
+    {
+      "epoch": 0.6084701815038893,
+      "grad_norm": 12.601286888122559,
+      "learning_rate": 2e-05,
+      "loss": 6.1834,
+      "step": 440
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 14.185168266296387,
+      "learning_rate": 2e-05,
+      "loss": 6.2586,
+      "step": 445
+    },
+    {
+      "epoch": 0.6222990492653414,
+      "grad_norm": 13.799695014953613,
+      "learning_rate": 2e-05,
+      "loss": 6.1915,
+      "step": 450
+    },
+    {
+      "epoch": 0.6292134831460674,
+      "grad_norm": 10.885003089904785,
+      "learning_rate": 2e-05,
+      "loss": 6.119,
+      "step": 455
+    },
+    {
+      "epoch": 0.6361279170267934,
+      "grad_norm": 12.816555976867676,
+      "learning_rate": 2e-05,
+      "loss": 6.1175,
+      "step": 460
+    },
+    {
+      "epoch": 0.6430423509075195,
+      "grad_norm": 11.098769187927246,
+      "learning_rate": 2e-05,
+      "loss": 6.2456,
+      "step": 465
+    },
+    {
+      "epoch": 0.6499567847882455,
+      "grad_norm": 12.00403881072998,
+      "learning_rate": 2e-05,
+      "loss": 6.137,
+      "step": 470
+    },
+    {
+      "epoch": 0.6568712186689715,
+      "grad_norm": 10.812249183654785,
+      "learning_rate": 2e-05,
+      "loss": 6.2851,
+      "step": 475
+    },
+    {
+      "epoch": 0.6637856525496975,
+      "grad_norm": 11.194480895996094,
+      "learning_rate": 2e-05,
+      "loss": 5.9929,
+      "step": 480
+    },
+    {
+      "epoch": 0.6707000864304236,
+      "grad_norm": 11.515169143676758,
+      "learning_rate": 2e-05,
+      "loss": 6.1984,
+      "step": 485
+    },
+    {
+      "epoch": 0.6776145203111495,
+      "grad_norm": 9.344099044799805,
+      "learning_rate": 2e-05,
+      "loss": 6.2658,
+      "step": 490
+    },
+    {
+      "epoch": 0.6845289541918755,
+      "grad_norm": 11.943121910095215,
+      "learning_rate": 2e-05,
+      "loss": 6.0862,
+      "step": 495
+    },
+    {
+      "epoch": 0.6914433880726015,
+      "grad_norm": 12.303791999816895,
+      "learning_rate": 2e-05,
+      "loss": 5.9433,
+      "step": 500
+    },
+    {
+      "epoch": 0.6983578219533275,
+      "grad_norm": 11.356282234191895,
+      "learning_rate": 2e-05,
+      "loss": 5.9841,
+      "step": 505
+    },
+    {
+      "epoch": 0.7052722558340536,
+      "grad_norm": 9.59524154663086,
+      "learning_rate": 2e-05,
+      "loss": 6.1749,
+      "step": 510
+    },
+    {
+      "epoch": 0.7121866897147796,
+      "grad_norm": 11.054985046386719,
+      "learning_rate": 2e-05,
+      "loss": 6.0718,
+      "step": 515
+    },
+    {
+      "epoch": 0.7191011235955056,
+      "grad_norm": 10.020822525024414,
+      "learning_rate": 2e-05,
+      "loss": 6.2672,
+      "step": 520
+    },
+    {
+      "epoch": 0.7260155574762316,
+      "grad_norm": 10.917634010314941,
+      "learning_rate": 2e-05,
+      "loss": 6.0429,
+      "step": 525
+    },
+    {
+      "epoch": 0.7329299913569577,
+      "grad_norm": 12.873083114624023,
+      "learning_rate": 2e-05,
+      "loss": 6.1324,
+      "step": 530
+    },
+    {
+      "epoch": 0.7398444252376837,
+      "grad_norm": 10.587157249450684,
+      "learning_rate": 2e-05,
+      "loss": 6.0654,
+      "step": 535
+    },
+    {
+      "epoch": 0.7467588591184097,
+      "grad_norm": 11.162230491638184,
+      "learning_rate": 2e-05,
+      "loss": 6.2684,
+      "step": 540
+    },
+    {
+      "epoch": 0.7536732929991357,
+      "grad_norm": 10.465380668640137,
+      "learning_rate": 2e-05,
+      "loss": 6.0308,
+      "step": 545
+    },
+    {
+      "epoch": 0.7605877268798618,
+      "grad_norm": 11.623172760009766,
+      "learning_rate": 2e-05,
+      "loss": 5.9999,
+      "step": 550
+    },
+    {
+      "epoch": 0.7675021607605877,
+      "grad_norm": 10.432977676391602,
+      "learning_rate": 2e-05,
+      "loss": 5.9707,
+      "step": 555
+    },
+    {
+      "epoch": 0.7744165946413137,
+      "grad_norm": 10.333474159240723,
+      "learning_rate": 2e-05,
+      "loss": 5.9559,
+      "step": 560
+    },
+    {
+      "epoch": 0.7813310285220397,
+      "grad_norm": 9.531599044799805,
+      "learning_rate": 2e-05,
+      "loss": 6.0119,
+      "step": 565
+    },
+    {
+      "epoch": 0.7882454624027658,
+      "grad_norm": 9.38166332244873,
+      "learning_rate": 2e-05,
+      "loss": 6.1793,
+      "step": 570
+    },
+    {
+      "epoch": 0.7951598962834918,
+      "grad_norm": 10.807636260986328,
+      "learning_rate": 2e-05,
+      "loss": 6.0544,
+      "step": 575
+    },
+    {
+      "epoch": 0.8020743301642178,
+      "grad_norm": 11.60075855255127,
+      "learning_rate": 2e-05,
+      "loss": 5.9238,
+      "step": 580
+    },
+    {
+      "epoch": 0.8089887640449438,
+      "grad_norm": 10.713749885559082,
+      "learning_rate": 2e-05,
+      "loss": 6.157,
+      "step": 585
+    },
+    {
+      "epoch": 0.8159031979256698,
+      "grad_norm": 9.57898998260498,
+      "learning_rate": 2e-05,
+      "loss": 6.2136,
+      "step": 590
+    },
+    {
+      "epoch": 0.8228176318063959,
+      "grad_norm": 10.713813781738281,
+      "learning_rate": 2e-05,
+      "loss": 6.0313,
+      "step": 595
+    },
+    {
+      "epoch": 0.8297320656871219,
+      "grad_norm": 10.92052173614502,
+      "learning_rate": 2e-05,
+      "loss": 6.0788,
+      "step": 600
+    },
+    {
+      "epoch": 0.8366464995678479,
+      "grad_norm": 10.86861801147461,
+      "learning_rate": 2e-05,
+      "loss": 6.1523,
+      "step": 605
+    },
+    {
+      "epoch": 0.8435609334485739,
+      "grad_norm": 10.533904075622559,
+      "learning_rate": 2e-05,
+      "loss": 6.0844,
+      "step": 610
+    },
+    {
+      "epoch": 0.8504753673293,
+      "grad_norm": 10.706273078918457,
+      "learning_rate": 2e-05,
+      "loss": 6.0835,
+      "step": 615
+    },
+    {
+      "epoch": 0.857389801210026,
+      "grad_norm": 10.777445793151855,
+      "learning_rate": 2e-05,
+      "loss": 5.9672,
+      "step": 620
+    },
+    {
+      "epoch": 0.8643042350907519,
+      "grad_norm": 10.988224029541016,
+      "learning_rate": 2e-05,
+      "loss": 5.9581,
+      "step": 625
+    },
+    {
+      "epoch": 0.8712186689714779,
+      "grad_norm": 9.580227851867676,
+      "learning_rate": 2e-05,
+      "loss": 5.9925,
+      "step": 630
+    },
+    {
+      "epoch": 0.878133102852204,
+      "grad_norm": 10.49671745300293,
+      "learning_rate": 2e-05,
+      "loss": 6.0107,
+      "step": 635
+    },
+    {
+      "epoch": 0.88504753673293,
+      "grad_norm": 13.19527816772461,
+      "learning_rate": 2e-05,
+      "loss": 5.8527,
+      "step": 640
+    },
+    {
+      "epoch": 0.891961970613656,
+      "grad_norm": 8.78010368347168,
+      "learning_rate": 2e-05,
+      "loss": 5.8679,
+      "step": 645
+    },
+    {
+      "epoch": 0.898876404494382,
+      "grad_norm": 11.30097484588623,
+      "learning_rate": 2e-05,
+      "loss": 5.889,
+      "step": 650
+    },
+    {
+      "epoch": 0.905790838375108,
+      "grad_norm": 9.741323471069336,
+      "learning_rate": 2e-05,
+      "loss": 6.0256,
+      "step": 655
+    },
+    {
+      "epoch": 0.9127052722558341,
+      "grad_norm": 11.919527053833008,
+      "learning_rate": 2e-05,
+      "loss": 5.9445,
+      "step": 660
+    },
+    {
+      "epoch": 0.9196197061365601,
+      "grad_norm": 10.810650825500488,
+      "learning_rate": 2e-05,
+      "loss": 5.9838,
+      "step": 665
+    },
+    {
+      "epoch": 0.9265341400172861,
+      "grad_norm": 11.397479057312012,
+      "learning_rate": 2e-05,
+      "loss": 6.0101,
+      "step": 670
+    },
+    {
+      "epoch": 0.9334485738980121,
+      "grad_norm": 10.634114265441895,
+      "learning_rate": 2e-05,
+      "loss": 5.9298,
+      "step": 675
+    },
+    {
+      "epoch": 0.9403630077787382,
+      "grad_norm": 8.226367950439453,
+      "learning_rate": 2e-05,
+      "loss": 5.8346,
+      "step": 680
+    },
+    {
+      "epoch": 0.9472774416594641,
+      "grad_norm": 9.403233528137207,
+      "learning_rate": 2e-05,
+      "loss": 5.9856,
+      "step": 685
+    },
+    {
+      "epoch": 0.9541918755401901,
+      "grad_norm": 9.8892183303833,
+      "learning_rate": 2e-05,
+      "loss": 5.9664,
+      "step": 690
+    },
+    {
+      "epoch": 0.9611063094209161,
+      "grad_norm": 9.223326683044434,
+      "learning_rate": 2e-05,
+      "loss": 5.9664,
+      "step": 695
+    },
+    {
+      "epoch": 0.9680207433016422,
+      "grad_norm": 8.897212028503418,
+      "learning_rate": 2e-05,
+      "loss": 6.1061,
+      "step": 700
+    },
+    {
+      "epoch": 0.9749351771823682,
+      "grad_norm": 9.927872657775879,
+      "learning_rate": 2e-05,
+      "loss": 5.8527,
+      "step": 705
+    },
+    {
+      "epoch": 0.9818496110630942,
+      "grad_norm": 9.195091247558594,
+      "learning_rate": 2e-05,
+      "loss": 5.9133,
+      "step": 710
+    },
+    {
+      "epoch": 0.9887640449438202,
+      "grad_norm": 11.291016578674316,
+      "learning_rate": 2e-05,
+      "loss": 5.8547,
+      "step": 715
+    },
+    {
+      "epoch": 0.9956784788245462,
+      "grad_norm": 9.991976737976074,
+      "learning_rate": 2e-05,
+      "loss": 5.8738,
+      "step": 720
+    },
+    {
+      "epoch": 1.0025929127052722,
+      "grad_norm": 10.924784660339355,
+      "learning_rate": 2e-05,
+      "loss": 5.8584,
+      "step": 725
+    },
+    {
+      "epoch": 1.0095073465859983,
+      "grad_norm": 10.968101501464844,
+      "learning_rate": 2e-05,
+      "loss": 5.9049,
+      "step": 730
+    },
+    {
+      "epoch": 1.0164217804667244,
+      "grad_norm": 9.537883758544922,
+      "learning_rate": 2e-05,
+      "loss": 6.0092,
+      "step": 735
+    },
+    {
+      "epoch": 1.0233362143474503,
+      "grad_norm": 11.465971946716309,
+      "learning_rate": 2e-05,
+      "loss": 5.9312,
+      "step": 740
+    },
+    {
+      "epoch": 1.0302506482281764,
+      "grad_norm": 9.759550094604492,
+      "learning_rate": 2e-05,
+      "loss": 5.8873,
+      "step": 745
+    },
+    {
+      "epoch": 1.0371650821089022,
+      "grad_norm": 11.28131103515625,
+      "learning_rate": 2e-05,
+      "loss": 5.8255,
+      "step": 750
+    },
+    {
+      "epoch": 1.0440795159896283,
+      "grad_norm": 10.3982572555542,
+      "learning_rate": 2e-05,
+      "loss": 5.8887,
+      "step": 755
+    },
+    {
+      "epoch": 1.0509939498703544,
+      "grad_norm": 9.492704391479492,
+      "learning_rate": 2e-05,
+      "loss": 5.8575,
+      "step": 760
+    },
+    {
+      "epoch": 1.0579083837510803,
+      "grad_norm": 9.550153732299805,
+      "learning_rate": 2e-05,
+      "loss": 5.8067,
+      "step": 765
+    },
+    {
+      "epoch": 1.0648228176318064,
+      "grad_norm": 10.115997314453125,
+      "learning_rate": 2e-05,
+      "loss": 5.9256,
+      "step": 770
+    },
+    {
+      "epoch": 1.0717372515125323,
+      "grad_norm": 8.670522689819336,
+      "learning_rate": 2e-05,
+      "loss": 5.8622,
+      "step": 775
+    },
+    {
+      "epoch": 1.0786516853932584,
+      "grad_norm": 9.876131057739258,
+      "learning_rate": 2e-05,
+      "loss": 5.6528,
+      "step": 780
+    },
+    {
+      "epoch": 1.0855661192739845,
+      "grad_norm": 10.33151626586914,
+      "learning_rate": 2e-05,
+      "loss": 5.8696,
+      "step": 785
+    },
+    {
+      "epoch": 1.0924805531547104,
+      "grad_norm": 11.441190719604492,
+      "learning_rate": 2e-05,
+      "loss": 5.7032,
+      "step": 790
+    },
+    {
+      "epoch": 1.0993949870354365,
+      "grad_norm": 10.569171905517578,
+      "learning_rate": 2e-05,
+      "loss": 5.6969,
+      "step": 795
+    },
+    {
+      "epoch": 1.1063094209161626,
+      "grad_norm": 9.727921485900879,
+      "learning_rate": 2e-05,
+      "loss": 5.799,
+      "step": 800
+    },
+    {
+      "epoch": 1.1132238547968885,
+      "grad_norm": 9.22719955444336,
+      "learning_rate": 2e-05,
+      "loss": 5.8369,
+      "step": 805
+    },
+    {
+      "epoch": 1.1201382886776146,
+      "grad_norm": 10.601619720458984,
+      "learning_rate": 2e-05,
+      "loss": 5.7541,
+      "step": 810
+    },
+    {
+      "epoch": 1.1270527225583407,
+      "grad_norm": 9.599576950073242,
+      "learning_rate": 2e-05,
+      "loss": 5.9023,
+      "step": 815
+    },
+    {
+      "epoch": 1.1339671564390665,
+      "grad_norm": 9.800908088684082,
+      "learning_rate": 2e-05,
+      "loss": 5.864,
+      "step": 820
+    },
+    {
+      "epoch": 1.1408815903197926,
+      "grad_norm": 9.035316467285156,
+      "learning_rate": 2e-05,
+      "loss": 5.8064,
+      "step": 825
+    },
+    {
+      "epoch": 1.1477960242005185,
+      "grad_norm": 10.019914627075195,
+      "learning_rate": 2e-05,
+      "loss": 5.889,
+      "step": 830
+    },
+    {
+      "epoch": 1.1547104580812446,
+      "grad_norm": 9.004077911376953,
+      "learning_rate": 2e-05,
+      "loss": 5.8963,
+      "step": 835
+    },
+    {
+      "epoch": 1.1616248919619707,
+      "grad_norm": 9.362048149108887,
+      "learning_rate": 2e-05,
+      "loss": 5.7001,
+      "step": 840
+    },
+    {
+      "epoch": 1.1685393258426966,
+      "grad_norm": 9.430303573608398,
+      "learning_rate": 2e-05,
+      "loss": 5.791,
+      "step": 845
+    },
+    {
+      "epoch": 1.1754537597234227,
+      "grad_norm": 9.500127792358398,
+      "learning_rate": 2e-05,
+      "loss": 6.0178,
+      "step": 850
+    },
+    {
+      "epoch": 1.1823681936041486,
+      "grad_norm": 9.282292366027832,
+      "learning_rate": 2e-05,
+      "loss": 5.8817,
+      "step": 855
+    },
+    {
+      "epoch": 1.1892826274848747,
+      "grad_norm": 9.203031539916992,
+      "learning_rate": 2e-05,
+      "loss": 5.7435,
+      "step": 860
+    },
+    {
+      "epoch": 1.1961970613656008,
+      "grad_norm": 12.014494895935059,
+      "learning_rate": 2e-05,
+      "loss": 5.8167,
+      "step": 865
+    },
+    {
+      "epoch": 1.2031114952463267,
+      "grad_norm": 10.433515548706055,
+      "learning_rate": 2e-05,
+      "loss": 5.8354,
+      "step": 870
+    },
+    {
+      "epoch": 1.2100259291270528,
+      "grad_norm": 9.104364395141602,
+      "learning_rate": 2e-05,
+      "loss": 5.8387,
+      "step": 875
+    },
+    {
+      "epoch": 1.2169403630077786,
+      "grad_norm": 10.000246047973633,
+      "learning_rate": 2e-05,
+      "loss": 5.7894,
+      "step": 880
+    },
+    {
+      "epoch": 1.2238547968885047,
+      "grad_norm": 9.190520286560059,
+      "learning_rate": 2e-05,
+      "loss": 5.7622,
+      "step": 885
+    },
+    {
+      "epoch": 1.2307692307692308,
+      "grad_norm": 8.65381908416748,
+      "learning_rate": 2e-05,
+      "loss": 5.9024,
+      "step": 890
+    },
+    {
+      "epoch": 1.2376836646499567,
+      "grad_norm": 8.898099899291992,
+      "learning_rate": 2e-05,
+      "loss": 5.7316,
+      "step": 895
+    },
+    {
+      "epoch": 1.2445980985306828,
+      "grad_norm": 9.884634971618652,
+      "learning_rate": 2e-05,
+      "loss": 5.6677,
+      "step": 900
+    },
+    {
+      "epoch": 1.2515125324114087,
+      "grad_norm": 9.322458267211914,
+      "learning_rate": 2e-05,
+      "loss": 5.6476,
+      "step": 905
+    },
+    {
+      "epoch": 1.2584269662921348,
+      "grad_norm": 9.639848709106445,
+      "learning_rate": 2e-05,
+      "loss": 5.7434,
+      "step": 910
+    },
+    {
+      "epoch": 1.265341400172861,
+      "grad_norm": 9.0450439453125,
+      "learning_rate": 2e-05,
+      "loss": 5.6104,
+      "step": 915
+    },
+    {
+      "epoch": 1.272255834053587,
+      "grad_norm": 8.95763111114502,
+      "learning_rate": 2e-05,
+      "loss": 5.7735,
+      "step": 920
+    },
+    {
+      "epoch": 1.2791702679343129,
+      "grad_norm": 8.207048416137695,
+      "learning_rate": 2e-05,
+      "loss": 5.6961,
+      "step": 925
+    },
+    {
+      "epoch": 1.2860847018150388,
+      "grad_norm": 9.897936820983887,
+      "learning_rate": 2e-05,
+      "loss": 5.7379,
+      "step": 930
+    },
+    {
+      "epoch": 1.2929991356957649,
+      "grad_norm": 8.93698501586914,
+      "learning_rate": 2e-05,
+      "loss": 5.8806,
+      "step": 935
+    },
+    {
+      "epoch": 1.299913569576491,
+      "grad_norm": 9.129692077636719,
+      "learning_rate": 2e-05,
+      "loss": 5.829,
+      "step": 940
+    },
+    {
+      "epoch": 1.306828003457217,
+      "grad_norm": 8.706141471862793,
+      "learning_rate": 2e-05,
+      "loss": 5.4444,
+      "step": 945
+    },
+    {
+      "epoch": 1.313742437337943,
+      "grad_norm": 10.378802299499512,
+      "learning_rate": 2e-05,
+      "loss": 5.5419,
+      "step": 950
+    },
+    {
+      "epoch": 1.320656871218669,
+      "grad_norm": 8.69825267791748,
+      "learning_rate": 2e-05,
+      "loss": 5.8194,
+      "step": 955
+    },
+    {
+      "epoch": 1.327571305099395,
+      "grad_norm": 9.523035049438477,
+      "learning_rate": 2e-05,
+      "loss": 5.8134,
+      "step": 960
+    },
+    {
+      "epoch": 1.334485738980121,
+      "grad_norm": 9.302953720092773,
+      "learning_rate": 2e-05,
+      "loss": 5.7842,
+      "step": 965
+    },
+    {
+      "epoch": 1.3414001728608471,
+      "grad_norm": 9.545923233032227,
+      "learning_rate": 2e-05,
+      "loss": 5.6555,
+      "step": 970
+    },
+    {
+      "epoch": 1.348314606741573,
+      "grad_norm": 8.136236190795898,
+      "learning_rate": 2e-05,
+      "loss": 5.6523,
+      "step": 975
+    },
+    {
+      "epoch": 1.355229040622299,
+      "grad_norm": 9.082107543945312,
+      "learning_rate": 2e-05,
+      "loss": 5.7012,
+      "step": 980
+    },
+    {
+      "epoch": 1.362143474503025,
+      "grad_norm": 9.781044006347656,
+      "learning_rate": 2e-05,
+      "loss": 5.771,
+      "step": 985
+    },
+    {
+      "epoch": 1.369057908383751,
+      "grad_norm": 8.306391716003418,
+      "learning_rate": 2e-05,
+      "loss": 5.7315,
+      "step": 990
+    },
+    {
+      "epoch": 1.3759723422644772,
+      "grad_norm": 9.246503829956055,
+      "learning_rate": 2e-05,
+      "loss": 5.6642,
+      "step": 995
+    },
+    {
+      "epoch": 1.382886776145203,
+      "grad_norm": 9.134090423583984,
+      "learning_rate": 2e-05,
+      "loss": 5.7883,
+      "step": 1000
+    },
+    {
+      "epoch": 1.3898012100259292,
+      "grad_norm": 9.531157493591309,
+      "learning_rate": 2e-05,
+      "loss": 5.6292,
+      "step": 1005
+    },
+    {
+      "epoch": 1.396715643906655,
+      "grad_norm": 8.725780487060547,
+      "learning_rate": 2e-05,
+      "loss": 5.7954,
+      "step": 1010
+    },
+    {
+      "epoch": 1.4036300777873811,
+      "grad_norm": 10.372122764587402,
+      "learning_rate": 2e-05,
+      "loss": 5.6684,
+      "step": 1015
+    },
+    {
+      "epoch": 1.4105445116681072,
+      "grad_norm": 9.697192192077637,
+      "learning_rate": 2e-05,
+      "loss": 5.6207,
+      "step": 1020
+    },
+    {
+      "epoch": 1.4174589455488331,
+      "grad_norm": 8.608368873596191,
+      "learning_rate": 2e-05,
+      "loss": 5.7861,
+      "step": 1025
+    },
+    {
+      "epoch": 1.4243733794295592,
+      "grad_norm": 9.998618125915527,
+      "learning_rate": 2e-05,
+      "loss": 5.7614,
+      "step": 1030
+    },
+    {
+      "epoch": 1.431287813310285,
+      "grad_norm": 8.049917221069336,
+      "learning_rate": 2e-05,
+      "loss": 5.8703,
+      "step": 1035
+    },
+    {
+      "epoch": 1.4382022471910112,
+      "grad_norm": 8.179950714111328,
+      "learning_rate": 2e-05,
+      "loss": 5.6272,
+      "step": 1040
+    },
+    {
+      "epoch": 1.4451166810717373,
+      "grad_norm": 10.099967002868652,
+      "learning_rate": 2e-05,
+      "loss": 5.7138,
+      "step": 1045
+    },
+    {
+      "epoch": 1.4520311149524634,
+      "grad_norm": 10.001805305480957,
+      "learning_rate": 2e-05,
+      "loss": 5.626,
+      "step": 1050
+    },
+    {
+      "epoch": 1.4589455488331893,
+      "grad_norm": 9.001076698303223,
+      "learning_rate": 2e-05,
+      "loss": 5.5744,
+      "step": 1055
+    },
+    {
+      "epoch": 1.4658599827139154,
+      "grad_norm": 9.802993774414062,
+      "learning_rate": 2e-05,
+      "loss": 5.5668,
+      "step": 1060
+    },
+    {
+      "epoch": 1.4727744165946413,
+      "grad_norm": 10.538679122924805,
+      "learning_rate": 2e-05,
+      "loss": 5.7665,
+      "step": 1065
+    },
+    {
+      "epoch": 1.4796888504753674,
+      "grad_norm": 9.082561492919922,
+      "learning_rate": 2e-05,
+      "loss": 5.6423,
+      "step": 1070
+    },
+    {
+      "epoch": 1.4866032843560935,
+      "grad_norm": 8.30572509765625,
+      "learning_rate": 2e-05,
+      "loss": 5.6803,
+      "step": 1075
+    },
+    {
+      "epoch": 1.4935177182368193,
+      "grad_norm": 9.155613899230957,
+      "learning_rate": 2e-05,
+      "loss": 5.5482,
+      "step": 1080
+    },
+    {
+      "epoch": 1.5004321521175452,
+      "grad_norm": 8.145245552062988,
+      "learning_rate": 2e-05,
+      "loss": 5.5116,
+      "step": 1085
+    },
+    {
+      "epoch": 1.5073465859982713,
+      "grad_norm": 9.245566368103027,
+      "learning_rate": 2e-05,
+      "loss": 5.8766,
+      "step": 1090
+    },
+    {
+      "epoch": 1.5142610198789974,
+      "grad_norm": 9.05094051361084,
+      "learning_rate": 2e-05,
+      "loss": 5.7613,
+      "step": 1095
+    },
+    {
+      "epoch": 1.5211754537597235,
+      "grad_norm": 10.022308349609375,
+      "learning_rate": 2e-05,
+      "loss": 5.6245,
+      "step": 1100
+    },
+    {
+      "epoch": 1.5280898876404494,
+      "grad_norm": 7.789440631866455,
+      "learning_rate": 2e-05,
+      "loss": 5.6924,
+      "step": 1105
+    },
+    {
+      "epoch": 1.5350043215211755,
+      "grad_norm": 8.748954772949219,
+      "learning_rate": 2e-05,
+      "loss": 5.481,
+      "step": 1110
+    },
+    {
+      "epoch": 1.5419187554019014,
+      "grad_norm": 10.177347183227539,
+      "learning_rate": 2e-05,
+      "loss": 5.6785,
+      "step": 1115
+    },
+    {
+      "epoch": 1.5488331892826275,
+      "grad_norm": 10.402741432189941,
+      "learning_rate": 2e-05,
+      "loss": 5.6013,
+      "step": 1120
+    },
+    {
+      "epoch": 1.5557476231633536,
+      "grad_norm": 8.850756645202637,
+      "learning_rate": 2e-05,
+      "loss": 5.657,
+      "step": 1125
+    },
+    {
+      "epoch": 1.5626620570440797,
+      "grad_norm": 7.942482948303223,
+      "learning_rate": 2e-05,
+      "loss": 5.6496,
+      "step": 1130
+    },
+    {
+      "epoch": 1.5695764909248056,
+      "grad_norm": 8.457247734069824,
+      "learning_rate": 2e-05,
+      "loss": 5.6394,
+      "step": 1135
+    },
+    {
+      "epoch": 1.5764909248055314,
+      "grad_norm": 8.8961181640625,
+      "learning_rate": 2e-05,
+      "loss": 5.7258,
+      "step": 1140
+    },
+    {
+      "epoch": 1.5834053586862575,
+      "grad_norm": 9.401551246643066,
+      "learning_rate": 2e-05,
+      "loss": 5.4877,
+      "step": 1145
+    },
+    {
+      "epoch": 1.5903197925669836,
+      "grad_norm": 8.802103996276855,
+      "learning_rate": 2e-05,
+      "loss": 5.6192,
+      "step": 1150
+    },
+    {
+      "epoch": 1.5972342264477097,
+      "grad_norm": 8.03233528137207,
+      "learning_rate": 2e-05,
+      "loss": 5.5979,
+      "step": 1155
+    },
+    {
+      "epoch": 1.6041486603284356,
+      "grad_norm": 7.991047382354736,
+      "learning_rate": 2e-05,
+      "loss": 5.6558,
+      "step": 1160
+    },
+    {
+      "epoch": 1.6110630942091615,
+      "grad_norm": 7.965709686279297,
+      "learning_rate": 2e-05,
+      "loss": 5.6512,
+      "step": 1165
+    },
+    {
+      "epoch": 1.6179775280898876,
+      "grad_norm": 8.714449882507324,
+      "learning_rate": 2e-05,
+      "loss": 5.6624,
+      "step": 1170
+    },
+    {
+      "epoch": 1.6248919619706137,
+      "grad_norm": 8.108067512512207,
+      "learning_rate": 2e-05,
+      "loss": 5.5561,
+      "step": 1175
+    },
+    {
+      "epoch": 1.6318063958513398,
+      "grad_norm": 8.9557523727417,
+      "learning_rate": 2e-05,
+      "loss": 5.6994,
+      "step": 1180
+    },
+    {
+      "epoch": 1.6387208297320657,
+      "grad_norm": 8.757265090942383,
+      "learning_rate": 2e-05,
+      "loss": 5.7006,
+      "step": 1185
+    },
+    {
+      "epoch": 1.6456352636127916,
+      "grad_norm": 9.390371322631836,
+      "learning_rate": 2e-05,
+      "loss": 5.6128,
+      "step": 1190
+    },
+    {
+      "epoch": 1.6525496974935177,
+      "grad_norm": 10.731466293334961,
+      "learning_rate": 2e-05,
+      "loss": 5.607,
+      "step": 1195
+    },
+    {
+      "epoch": 1.6594641313742438,
+      "grad_norm": 7.870199203491211,
+      "learning_rate": 2e-05,
+      "loss": 5.5616,
+      "step": 1200
+    },
+    {
+      "epoch": 1.6663785652549699,
+      "grad_norm": 8.55478286743164,
+      "learning_rate": 2e-05,
+      "loss": 5.4787,
+      "step": 1205
+    },
+    {
+      "epoch": 1.6732929991356957,
+      "grad_norm": 8.947685241699219,
+      "learning_rate": 2e-05,
+      "loss": 5.5405,
+      "step": 1210
+    },
+    {
+      "epoch": 1.6802074330164216,
+      "grad_norm": 8.698450088500977,
+      "learning_rate": 2e-05,
+      "loss": 5.6053,
+      "step": 1215
+    },
+    {
+      "epoch": 1.6871218668971477,
+      "grad_norm": 8.03096866607666,
+      "learning_rate": 2e-05,
+      "loss": 5.6641,
+      "step": 1220
+    },
+    {
+      "epoch": 1.6940363007778738,
+      "grad_norm": 7.73576545715332,
+      "learning_rate": 2e-05,
+      "loss": 5.6051,
+      "step": 1225
+    },
+    {
+      "epoch": 1.7009507346586,
+      "grad_norm": 8.361144065856934,
+      "learning_rate": 2e-05,
+      "loss": 5.5136,
+      "step": 1230
+    },
+    {
+      "epoch": 1.7078651685393258,
+      "grad_norm": 8.468069076538086,
+      "learning_rate": 2e-05,
+      "loss": 5.6641,
+      "step": 1235
+    },
+    {
+      "epoch": 1.714779602420052,
+      "grad_norm": 8.493988990783691,
+      "learning_rate": 2e-05,
+      "loss": 5.8225,
+      "step": 1240
+    },
+    {
+      "epoch": 1.7216940363007778,
+      "grad_norm": 9.670719146728516,
+      "learning_rate": 2e-05,
+      "loss": 5.6226,
+      "step": 1245
+    },
+    {
+      "epoch": 1.7286084701815039,
+      "grad_norm": 8.527884483337402,
+      "learning_rate": 2e-05,
+      "loss": 5.4408,
+      "step": 1250
+    },
+    {
+      "epoch": 1.73552290406223,
+      "grad_norm": 9.163163185119629,
+      "learning_rate": 2e-05,
+      "loss": 5.6573,
+      "step": 1255
+    },
+    {
+      "epoch": 1.742437337942956,
+      "grad_norm": 8.136192321777344,
+      "learning_rate": 2e-05,
+      "loss": 5.7071,
+      "step": 1260
+    },
+    {
+      "epoch": 1.749351771823682,
+      "grad_norm": 8.329581260681152,
+      "learning_rate": 2e-05,
+      "loss": 5.6753,
+      "step": 1265
+    },
+    {
+      "epoch": 1.7562662057044078,
+      "grad_norm": 8.732542037963867,
+      "learning_rate": 2e-05,
+      "loss": 5.883,
+      "step": 1270
+    },
+    {
+      "epoch": 1.763180639585134,
+      "grad_norm": 7.646657466888428,
+      "learning_rate": 2e-05,
+      "loss": 5.5841,
+      "step": 1275
+    },
+    {
+      "epoch": 1.77009507346586,
+      "grad_norm": 8.392094612121582,
+      "learning_rate": 2e-05,
+      "loss": 5.7013,
+      "step": 1280
+    },
+    {
+      "epoch": 1.7770095073465861,
+      "grad_norm": 8.878968238830566,
+      "learning_rate": 2e-05,
+      "loss": 5.6175,
+      "step": 1285
+    },
+    {
+      "epoch": 1.783923941227312,
+      "grad_norm": 8.324740409851074,
+      "learning_rate": 2e-05,
+      "loss": 5.6653,
+      "step": 1290
+    },
+    {
+      "epoch": 1.790838375108038,
+      "grad_norm": 8.578971862792969,
+      "learning_rate": 2e-05,
+      "loss": 5.6605,
+      "step": 1295
+    },
+    {
+      "epoch": 1.797752808988764,
+      "grad_norm": 7.883670330047607,
+      "learning_rate": 2e-05,
+      "loss": 5.6848,
+      "step": 1300
+    },
+    {
+      "epoch": 1.80466724286949,
+      "grad_norm": 8.626081466674805,
+      "learning_rate": 2e-05,
+      "loss": 5.66,
+      "step": 1305
+    },
+    {
+      "epoch": 1.8115816767502162,
+      "grad_norm": 9.518331527709961,
+      "learning_rate": 2e-05,
+      "loss": 5.5747,
+      "step": 1310
+    },
+    {
+      "epoch": 1.818496110630942,
+      "grad_norm": 8.553828239440918,
+      "learning_rate": 2e-05,
+      "loss": 5.5066,
+      "step": 1315
+    },
+    {
+      "epoch": 1.825410544511668,
+      "grad_norm": 9.588334083557129,
+      "learning_rate": 2e-05,
+      "loss": 5.5819,
+      "step": 1320
+    },
+    {
+      "epoch": 1.832324978392394,
+      "grad_norm": 8.089001655578613,
+      "learning_rate": 2e-05,
+      "loss": 5.5675,
+      "step": 1325
+    },
+    {
+      "epoch": 1.8392394122731202,
+      "grad_norm": 7.650081157684326,
+      "learning_rate": 2e-05,
+      "loss": 5.6408,
+      "step": 1330
+    },
+    {
+      "epoch": 1.8461538461538463,
+      "grad_norm": 8.453227996826172,
+      "learning_rate": 2e-05,
+      "loss": 5.8178,
+      "step": 1335
+    },
+    {
+      "epoch": 1.8530682800345721,
+      "grad_norm": 8.31882095336914,
+      "learning_rate": 2e-05,
+      "loss": 5.5703,
+      "step": 1340
+    },
+    {
+      "epoch": 1.8599827139152982,
+      "grad_norm": 7.963268280029297,
+      "learning_rate": 2e-05,
+      "loss": 5.5613,
+      "step": 1345
+    },
+    {
+      "epoch": 1.8668971477960241,
+      "grad_norm": 8.917067527770996,
+      "learning_rate": 2e-05,
+      "loss": 5.6031,
+      "step": 1350
+    },
+    {
+      "epoch": 1.8738115816767502,
+      "grad_norm": 8.964554786682129,
+      "learning_rate": 2e-05,
+      "loss": 5.5979,
+      "step": 1355
+    },
+    {
+      "epoch": 1.8807260155574763,
+      "grad_norm": 8.522499084472656,
+      "learning_rate": 2e-05,
+      "loss": 5.5685,
+      "step": 1360
+    },
+    {
+      "epoch": 1.8876404494382022,
+      "grad_norm": 8.817831039428711,
+      "learning_rate": 2e-05,
+      "loss": 5.5745,
+      "step": 1365
+    },
+    {
+      "epoch": 1.8945548833189283,
+      "grad_norm": 7.633350372314453,
+      "learning_rate": 2e-05,
+      "loss": 5.7164,
+      "step": 1370
+    },
+    {
+      "epoch": 1.9014693171996542,
+      "grad_norm": 8.85508918762207,
+      "learning_rate": 2e-05,
+      "loss": 5.514,
+      "step": 1375
+    },
+    {
+      "epoch": 1.9083837510803803,
+      "grad_norm": 7.352026462554932,
+      "learning_rate": 2e-05,
+      "loss": 5.6187,
+      "step": 1380
+    },
+    {
+      "epoch": 1.9152981849611064,
+      "grad_norm": 8.067057609558105,
+      "learning_rate": 2e-05,
+      "loss": 5.6516,
+      "step": 1385
+    },
+    {
+      "epoch": 1.9222126188418325,
+      "grad_norm": 8.725987434387207,
+      "learning_rate": 2e-05,
+      "loss": 5.6543,
+      "step": 1390
+    },
+    {
+      "epoch": 1.9291270527225584,
+      "grad_norm": 7.8433003425598145,
+      "learning_rate": 2e-05,
+      "loss": 5.6603,
+      "step": 1395
+    },
+    {
+      "epoch": 1.9360414866032842,
+      "grad_norm": 8.05147647857666,
+      "learning_rate": 2e-05,
+      "loss": 5.521,
+      "step": 1400
+    },
+    {
+      "epoch": 1.9429559204840103,
+      "grad_norm": 7.805672645568848,
+      "learning_rate": 2e-05,
+      "loss": 5.5867,
+      "step": 1405
+    },
+    {
+      "epoch": 1.9498703543647364,
+      "grad_norm": 7.390529632568359,
+      "learning_rate": 2e-05,
+      "loss": 5.5616,
+      "step": 1410
+    },
+    {
+      "epoch": 1.9567847882454625,
+      "grad_norm": 8.674482345581055,
+      "learning_rate": 2e-05,
+      "loss": 5.6737,
+      "step": 1415
+    },
+    {
+      "epoch": 1.9636992221261884,
+      "grad_norm": 7.816572189331055,
+      "learning_rate": 2e-05,
+      "loss": 5.4654,
+      "step": 1420
+    },
+    {
+      "epoch": 1.9706136560069143,
+      "grad_norm": 8.28125286102295,
+      "learning_rate": 2e-05,
+      "loss": 5.6771,
+      "step": 1425
+    },
+    {
+      "epoch": 1.9775280898876404,
+      "grad_norm": 8.431089401245117,
+      "learning_rate": 2e-05,
+      "loss": 5.5056,
+      "step": 1430
+    },
+    {
+      "epoch": 1.9844425237683665,
+      "grad_norm": 8.352622032165527,
+      "learning_rate": 2e-05,
+      "loss": 5.4347,
+      "step": 1435
+    },
+    {
+      "epoch": 1.9913569576490926,
+      "grad_norm": 8.090385437011719,
+      "learning_rate": 2e-05,
+      "loss": 5.5569,
+      "step": 1440
+    },
+    {
+      "epoch": 1.9982713915298185,
+      "grad_norm": 7.950430393218994,
+      "learning_rate": 2e-05,
+      "loss": 5.6047,
+      "step": 1445
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 2169,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.4890849106275922e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

spaceqwen2-7b-instruct/checkpoint-1446/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5f930008ae8c5a8d1f1fc80733f1d809377ce163e2c0e7a2c4585c84c969ba9
+size 5560

spaceqwen2-7b-instruct/checkpoint-1446/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

spaceqwen2-7b-instruct/checkpoint-2169/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen2-VL-7B-Instruct
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

spaceqwen2-7b-instruct/checkpoint-2169/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2-VL-7B-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_fc",
+    "out_proj",
+    "c_attn",
+    "v_proj",
+    "w1",
+    "attn.c_proj",
+    "q_proj",
+    "in_proj",
+    "w2"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

spaceqwen2-7b-instruct/checkpoint-2169/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ea4fcb7262d44ebd7acc0b0623fe2bcb94f40bcb5b5ecbdc8859b3c8f1b4951
+size 161495976

spaceqwen2-7b-instruct/checkpoint-2169/added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

spaceqwen2-7b-instruct/checkpoint-2169/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff