Emna102 commited on
Commit
43499df
1 Parent(s): 2b5669a

End of training

Browse files
README.md CHANGED
@@ -12,11 +12,12 @@ model-index:
12
  should probably proofread and complete it, then remove this comment. -->
13
 
14
  [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/emnamaghrebi-epfl/idefics2-8B-ft-dataset/runs/i6dz9zuf)
 
15
  # idefics2-8b-manuals-ft-v4
16
 
17
  This model is a fine-tuned version of [HuggingFaceM4/idefics2-8b](https://huggingface.co/HuggingFaceM4/idefics2-8b) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.1312
20
 
21
  ## Model description
22
 
@@ -51,9 +52,9 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:------:|:----:|:---------------:|
54
- | 8.0773 | 1.1299 | 25 | 0.6877 |
55
- | 0.3475 | 2.2599 | 50 | 0.1887 |
56
- | 0.1647 | 3.3898 | 75 | 0.1312 |
57
 
58
 
59
  ### Framework versions
 
12
  should probably proofread and complete it, then remove this comment. -->
13
 
14
  [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/emnamaghrebi-epfl/idefics2-8B-ft-dataset/runs/i6dz9zuf)
15
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/emnamaghrebi-epfl/idefics2-8B-ft-dataset/runs/sciuimwo)
16
  # idefics2-8b-manuals-ft-v4
17
 
18
  This model is a fine-tuned version of [HuggingFaceM4/idefics2-8b](https://huggingface.co/HuggingFaceM4/idefics2-8b) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 5.7217
21
 
22
  ## Model description
23
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:------:|:----:|:---------------:|
55
+ | 15.1103 | 1.1299 | 25 | 14.9539 |
56
+ | 13.8953 | 2.2599 | 50 | 12.0379 |
57
+ | 8.7424 | 3.3898 | 75 | 5.7217 |
58
 
59
 
60
  ### Framework versions
adapter_config.json CHANGED
@@ -10,7 +10,7 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -19,7 +19,7 @@
19
  "r": 64,
20
  "rank_pattern": {},
21
  "revision": null,
22
- "target_modules": ".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$",
23
  "task_type": null,
24
  "use_dora": false,
25
  "use_rslora": false
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 64,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
 
19
  "r": 64,
20
  "rank_pattern": {},
21
  "revision": null,
22
+ "target_modules": ".*(modality_projection|perceiver_resampler).*(k_proj|q_proj|v_proj).*$",
23
  "task_type": null,
24
  "use_dora": false,
25
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fb7f2057c99555a51fe5cd8c567e2074fcdafa2abd5729efee6a32593ceb6ca
3
- size 746528304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a9d5c0b60938cc7f723069477b9643af616c7d986d8a6d03123494906a3fe13
3
+ size 11209608
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a401f4ac56bb712a238b846f3e8de3ababd9a13d6643b0c95762adbcbb5ef94
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851879ab1b20cd37c91ab4c6b87202529d9a58dacae5ac3d034412afedc526e0
3
  size 5240