sofyc commited on
Commit
1512030
1 Parent(s): d8e4519
README.md CHANGED
@@ -1,5 +1,7 @@
1
  ---
2
  base_model: microsoft/Phi-3.5-mini-instruct
 
 
3
  library_name: peft
4
  license: mit
5
  tags:
@@ -16,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # Phi-3.5-MultiCap-tool-embedding-past
18
 
19
- This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.1321
22
 
23
  ## Model description
24
 
@@ -52,14 +54,19 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:------:|:----:|:---------------:|
55
- | 1.132 | 0.2256 | 50 | 1.1642 |
56
- | 1.1368 | 0.4512 | 100 | 1.1333 |
57
- | 1.0744 | 0.6768 | 150 | 1.1322 |
58
- | 1.2177 | 0.9024 | 200 | 1.1322 |
59
- | 1.1014 | 1.1280 | 250 | 1.1320 |
60
- | 1.1074 | 1.3536 | 300 | 1.1319 |
61
- | 1.0651 | 1.5792 | 350 | 1.1317 |
62
- | 1.1232 | 1.8049 | 400 | 1.1321 |
 
 
 
 
 
63
 
64
 
65
  ### Framework versions
 
1
  ---
2
  base_model: microsoft/Phi-3.5-mini-instruct
3
+ datasets:
4
+ - generator
5
  library_name: peft
6
  license: mit
7
  tags:
 
18
 
19
  # Phi-3.5-MultiCap-tool-embedding-past
20
 
21
+ This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.7561
24
 
25
  ## Model description
26
 
 
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:------:|:----:|:---------------:|
57
+ | 1.1082 | 0.1524 | 50 | 1.1135 |
58
+ | 0.9647 | 0.3048 | 100 | 1.0051 |
59
+ | 0.9516 | 0.4571 | 150 | 0.9498 |
60
+ | 0.8882 | 0.6095 | 200 | 0.9027 |
61
+ | 0.9183 | 0.7619 | 250 | 0.8649 |
62
+ | 0.7923 | 0.9143 | 300 | 0.8355 |
63
+ | 0.8078 | 1.0667 | 350 | 0.8137 |
64
+ | 0.7677 | 1.2190 | 400 | 0.7969 |
65
+ | 0.765 | 1.3714 | 450 | 0.7822 |
66
+ | 0.812 | 1.5238 | 500 | 0.7720 |
67
+ | 0.7376 | 1.6762 | 550 | 0.7638 |
68
+ | 0.7617 | 1.8286 | 600 | 0.7586 |
69
+ | 0.7299 | 1.9810 | 650 | 0.7561 |
70
 
71
 
72
  ### Framework versions
adapter_config.json CHANGED
@@ -14,22 +14,20 @@
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
- "modules_to_save": [
18
- "t_proj"
19
- ],
20
  "peft_type": "LORA",
21
  "r": 16,
22
  "rank_pattern": {},
23
  "revision": null,
24
  "target_modules": [
25
- "q_proj",
26
- "v_proj",
27
- "o_proj",
28
- "qkv_proj",
29
- "k_proj",
30
  "down_proj",
 
31
  "gate_up_proj",
32
- "up_proj"
 
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
 
 
18
  "peft_type": "LORA",
19
  "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
 
23
  "down_proj",
24
+ "o_proj",
25
  "gate_up_proj",
26
+ "v_proj",
27
+ "k_proj",
28
+ "up_proj",
29
+ "q_proj",
30
+ "qkv_proj"
31
  ],
32
  "task_type": "CAUSAL_LM",
33
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bde40c21ab8e095f4a8d1038a1fe893adb57c06a4d8fbe4283601e43114b5d3a
3
- size 138458960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c486fd2ea7e3344c063eab053adcc7f3b4f5db6ddc91285ff1101cf2eefb34d
3
+ size 100697728
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:423b78a6d185c26265182b33ed6c19fea08648f4be2de00d4517b820bb9ab8e3
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820a6fd18a197c4958e64f01299d4f90193b27e5b5eb5e41a286b4a770e26256
3
  size 5496