sujithatz/finbot-transofrmer-based-phi3.5_adapter

Browse files

Files changed (5) hide show

README.md +59 -33
adapter_config.json +4 -7
adapter_model.safetensors +2 -2
tokenizer.json +1 -6
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.4084
 ## Model description
@@ -37,45 +37,71 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0002
-- train_batch_size: 8
-- eval_batch_size: 8
-- seed: 42
 - gradient_accumulation_steps: 4
-- total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: linear
-- lr_scheduler_warmup_ratio: 0.1
-- num_epochs: 30
 ### Training results
 | Training Loss | Epoch   | Step | Validation Loss |
 |:-------------:|:-------:|:----:|:---------------:|
-| 1.415         | 1.1765  | 5    | 1.4148          |
-| 1.2791        | 2.3529  | 10   | 1.2542          |
-| 1.0303        | 3.5294  | 15   | 0.9828          |
-| 0.7989        | 4.7059  | 20   | 0.7193          |
-| 0.5792        | 5.8824  | 25   | 0.5793          |
-| 0.5074        | 7.0588  | 30   | 0.5133          |
-| 0.4558        | 8.2353  | 35   | 0.4714          |
-| 0.361         | 9.4118  | 40   | 0.4478          |
-| 0.3751        | 10.5882 | 45   | 0.4236          |
-| 0.2908        | 11.7647 | 50   | 0.4106          |
-| 0.263         | 12.9412 | 55   | 0.3855          |
-| 0.2515        | 14.1176 | 60   | 0.3760          |
-| 0.2391        | 15.2941 | 65   | 0.3752          |
-| 0.1973        | 16.4706 | 70   | 0.3723          |
-| 0.1638        | 17.6471 | 75   | 0.3740          |
-| 0.1776        | 18.8235 | 80   | 0.3868          |
-| 0.2008        | 20.0    | 85   | 0.3798          |
-| 0.1569        | 21.1765 | 90   | 0.3848          |
-| 0.1284        | 22.3529 | 95   | 0.3901          |
-| 0.1171        | 23.5294 | 100  | 0.3969          |
-| 0.1364        | 24.7059 | 105  | 0.3950          |
-| 0.1401        | 25.8824 | 110  | 0.4070          |
-| 0.1195        | 27.0588 | 115  | 0.4091          |
-| 0.1219        | 28.2353 | 120  | 0.4084          |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6126
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 6e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 0
 - gradient_accumulation_steps: 4
+- total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 5
+- training_steps: 250
 ### Training results
 | Training Loss | Epoch   | Step | Validation Loss |
 |:-------------:|:-------:|:----:|:---------------:|
+| 1.3693        | 0.6667  | 5    | 1.3378          |
+| 1.1643        | 1.3333  | 10   | 1.1047          |
+| 0.8388        | 2.0     | 15   | 0.8767          |
+| 0.6894        | 2.6667  | 20   | 0.6828          |
+| 0.5636        | 3.3333  | 25   | 0.5688          |
+| 0.4496        | 4.0     | 30   | 0.5110          |
+| 0.3487        | 4.6667  | 35   | 0.4549          |
+| 0.3169        | 5.3333  | 40   | 0.4148          |
+| 0.2595        | 6.0     | 45   | 0.3893          |
+| 0.2002        | 6.6667  | 50   | 0.3733          |
+| 0.2437        | 7.3333  | 55   | 0.3597          |
+| 0.1669        | 8.0     | 60   | 0.3456          |
+| 0.1873        | 8.6667  | 65   | 0.3491          |
+| 0.1831        | 9.3333  | 70   | 0.3422          |
+| 0.1581        | 10.0    | 75   | 0.3664          |
+| 0.0831        | 10.6667 | 80   | 0.3644          |
+| 0.1277        | 11.3333 | 85   | 0.3822          |
+| 0.0539        | 12.0    | 90   | 0.3868          |
+| 0.0799        | 12.6667 | 95   | 0.4190          |
+| 0.066         | 13.3333 | 100  | 0.4375          |
+| 0.0564        | 14.0    | 105  | 0.4581          |
+| 0.0356        | 14.6667 | 110  | 0.4715          |
+| 0.0493        | 15.3333 | 115  | 0.4896          |
+| 0.0399        | 16.0    | 120  | 0.5066          |
+| 0.0452        | 16.6667 | 125  | 0.5022          |
+| 0.0305        | 17.3333 | 130  | 0.5246          |
+| 0.036         | 18.0    | 135  | 0.5492          |
+| 0.0282        | 18.6667 | 140  | 0.5537          |
+| 0.0327        | 19.3333 | 145  | 0.5703          |
+| 0.0341        | 20.0    | 150  | 0.5699          |
+| 0.0315        | 20.6667 | 155  | 0.5761          |
+| 0.0284        | 21.3333 | 160  | 0.5781          |
+| 0.027         | 22.0    | 165  | 0.5818          |
+| 0.0258        | 22.6667 | 170  | 0.5858          |
+| 0.0224        | 23.3333 | 175  | 0.5884          |
+| 0.0253        | 24.0    | 180  | 0.5960          |
+| 0.0232        | 24.6667 | 185  | 0.6015          |
+| 0.0256        | 25.3333 | 190  | 0.6088          |
+| 0.0226        | 26.0    | 195  | 0.6106          |
+| 0.0226        | 26.6667 | 200  | 0.6096          |
+| 0.0259        | 27.3333 | 205  | 0.6102          |
+| 0.0217        | 28.0    | 210  | 0.6100          |
+| 0.022         | 28.6667 | 215  | 0.6115          |
+| 0.0219        | 29.3333 | 220  | 0.6115          |
+| 0.0239        | 30.0    | 225  | 0.6109          |
+| 0.0226        | 30.6667 | 230  | 0.6123          |
+| 0.0219        | 31.3333 | 235  | 0.6140          |
+| 0.0201        | 32.0    | 240  | 0.6128          |
+| 0.0198        | 32.6667 | 245  | 0.6130          |
+| 0.0234        | 33.3333 | 250  | 0.6126          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -20,13 +20,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "up_proj",
-    "down_proj",
-    "k_proj",
-    "gate_proj",
     "o_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 32,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "gate_up_proj",
+    "qkv_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c31fa0d424f7aa52b479ebad75d44eddaeb88a380690ecea3897eaa386703c7a
-size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1c60cbc92cc45ddb7014507b500e6f2777068271267e79a3be33192d32a31c4
+size 100697728

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2dc0d2c1e7c21c088f647428d6cedc77b727218a9706b56e15904c3d278b5b2
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6ea568638ab5e64f42deb9addbb690de5d9709be233aa1fbb7080347e83b0a2
 size 5432