sujithatz commited on
Commit
bc428ab
1 Parent(s): 6afdc4f

sujithatz/finbot-transofrmer-based-phi3.5_adapter

Browse files
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4084
22
 
23
  ## Model description
24
 
@@ -37,45 +37,71 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 0.0002
41
- - train_batch_size: 8
42
- - eval_batch_size: 8
43
- - seed: 42
44
  - gradient_accumulation_steps: 4
45
- - total_train_batch_size: 32
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
- - lr_scheduler_type: linear
48
- - lr_scheduler_warmup_ratio: 0.1
49
- - num_epochs: 30
50
 
51
  ### Training results
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-------:|:----:|:---------------:|
55
- | 1.415 | 1.1765 | 5 | 1.4148 |
56
- | 1.2791 | 2.3529 | 10 | 1.2542 |
57
- | 1.0303 | 3.5294 | 15 | 0.9828 |
58
- | 0.7989 | 4.7059 | 20 | 0.7193 |
59
- | 0.5792 | 5.8824 | 25 | 0.5793 |
60
- | 0.5074 | 7.0588 | 30 | 0.5133 |
61
- | 0.4558 | 8.2353 | 35 | 0.4714 |
62
- | 0.361 | 9.4118 | 40 | 0.4478 |
63
- | 0.3751 | 10.5882 | 45 | 0.4236 |
64
- | 0.2908 | 11.7647 | 50 | 0.4106 |
65
- | 0.263 | 12.9412 | 55 | 0.3855 |
66
- | 0.2515 | 14.1176 | 60 | 0.3760 |
67
- | 0.2391 | 15.2941 | 65 | 0.3752 |
68
- | 0.1973 | 16.4706 | 70 | 0.3723 |
69
- | 0.1638 | 17.6471 | 75 | 0.3740 |
70
- | 0.1776 | 18.8235 | 80 | 0.3868 |
71
- | 0.2008 | 20.0 | 85 | 0.3798 |
72
- | 0.1569 | 21.1765 | 90 | 0.3848 |
73
- | 0.1284 | 22.3529 | 95 | 0.3901 |
74
- | 0.1171 | 23.5294 | 100 | 0.3969 |
75
- | 0.1364 | 24.7059 | 105 | 0.3950 |
76
- | 0.1401 | 25.8824 | 110 | 0.4070 |
77
- | 0.1195 | 27.0588 | 115 | 0.4091 |
78
- | 0.1219 | 28.2353 | 120 | 0.4084 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.6126
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 6e-05
41
+ - train_batch_size: 4
42
+ - eval_batch_size: 4
43
+ - seed: 0
44
  - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 16
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_steps: 5
49
+ - training_steps: 250
50
 
51
  ### Training results
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-------:|:----:|:---------------:|
55
+ | 1.3693 | 0.6667 | 5 | 1.3378 |
56
+ | 1.1643 | 1.3333 | 10 | 1.1047 |
57
+ | 0.8388 | 2.0 | 15 | 0.8767 |
58
+ | 0.6894 | 2.6667 | 20 | 0.6828 |
59
+ | 0.5636 | 3.3333 | 25 | 0.5688 |
60
+ | 0.4496 | 4.0 | 30 | 0.5110 |
61
+ | 0.3487 | 4.6667 | 35 | 0.4549 |
62
+ | 0.3169 | 5.3333 | 40 | 0.4148 |
63
+ | 0.2595 | 6.0 | 45 | 0.3893 |
64
+ | 0.2002 | 6.6667 | 50 | 0.3733 |
65
+ | 0.2437 | 7.3333 | 55 | 0.3597 |
66
+ | 0.1669 | 8.0 | 60 | 0.3456 |
67
+ | 0.1873 | 8.6667 | 65 | 0.3491 |
68
+ | 0.1831 | 9.3333 | 70 | 0.3422 |
69
+ | 0.1581 | 10.0 | 75 | 0.3664 |
70
+ | 0.0831 | 10.6667 | 80 | 0.3644 |
71
+ | 0.1277 | 11.3333 | 85 | 0.3822 |
72
+ | 0.0539 | 12.0 | 90 | 0.3868 |
73
+ | 0.0799 | 12.6667 | 95 | 0.4190 |
74
+ | 0.066 | 13.3333 | 100 | 0.4375 |
75
+ | 0.0564 | 14.0 | 105 | 0.4581 |
76
+ | 0.0356 | 14.6667 | 110 | 0.4715 |
77
+ | 0.0493 | 15.3333 | 115 | 0.4896 |
78
+ | 0.0399 | 16.0 | 120 | 0.5066 |
79
+ | 0.0452 | 16.6667 | 125 | 0.5022 |
80
+ | 0.0305 | 17.3333 | 130 | 0.5246 |
81
+ | 0.036 | 18.0 | 135 | 0.5492 |
82
+ | 0.0282 | 18.6667 | 140 | 0.5537 |
83
+ | 0.0327 | 19.3333 | 145 | 0.5703 |
84
+ | 0.0341 | 20.0 | 150 | 0.5699 |
85
+ | 0.0315 | 20.6667 | 155 | 0.5761 |
86
+ | 0.0284 | 21.3333 | 160 | 0.5781 |
87
+ | 0.027 | 22.0 | 165 | 0.5818 |
88
+ | 0.0258 | 22.6667 | 170 | 0.5858 |
89
+ | 0.0224 | 23.3333 | 175 | 0.5884 |
90
+ | 0.0253 | 24.0 | 180 | 0.5960 |
91
+ | 0.0232 | 24.6667 | 185 | 0.6015 |
92
+ | 0.0256 | 25.3333 | 190 | 0.6088 |
93
+ | 0.0226 | 26.0 | 195 | 0.6106 |
94
+ | 0.0226 | 26.6667 | 200 | 0.6096 |
95
+ | 0.0259 | 27.3333 | 205 | 0.6102 |
96
+ | 0.0217 | 28.0 | 210 | 0.6100 |
97
+ | 0.022 | 28.6667 | 215 | 0.6115 |
98
+ | 0.0219 | 29.3333 | 220 | 0.6115 |
99
+ | 0.0239 | 30.0 | 225 | 0.6109 |
100
+ | 0.0226 | 30.6667 | 230 | 0.6123 |
101
+ | 0.0219 | 31.3333 | 235 | 0.6140 |
102
+ | 0.0201 | 32.0 | 240 | 0.6128 |
103
+ | 0.0198 | 32.6667 | 245 | 0.6130 |
104
+ | 0.0234 | 33.3333 | 250 | 0.6126 |
105
 
106
 
107
  ### Framework versions
adapter_config.json CHANGED
@@ -10,7 +10,7 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 16,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -20,13 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "up_proj",
25
- "down_proj",
26
- "k_proj",
27
- "gate_proj",
28
  "o_proj",
29
- "v_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 32,
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
 
23
  "o_proj",
24
+ "gate_up_proj",
25
+ "qkv_proj",
26
+ "down_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c31fa0d424f7aa52b479ebad75d44eddaeb88a380690ecea3897eaa386703c7a
3
- size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c60cbc92cc45ddb7014507b500e6f2777068271267e79a3be33192d32a31c4
3
+ size 100697728
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2dc0d2c1e7c21c088f647428d6cedc77b727218a9706b56e15904c3d278b5b2
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6ea568638ab5e64f42deb9addbb690de5d9709be233aa1fbb7080347e83b0a2
3
  size 5432