ShilpaSandhya/phi3_5_mini_lora_chemical_eng_flash

Browse files

Files changed (4) hide show

README.md +8 -9
adapter_config.json +29 -29
adapter_model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0793
 ## Model description
@@ -44,20 +44,19 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.03
-- num_epochs: 10
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 0.0877        | 0.7319 | 100  | 0.0874          |
-| 0.0785        | 1.4639 | 200  | 0.0812          |
-| 0.0763        | 2.1958 | 300  | 0.0799          |
-| 0.0782        | 2.9277 | 400  | 0.0793          |
-| 0.0764        | 3.6597 | 500  | 0.0795          |
-| 0.0683        | 4.3916 | 600  | 0.0798          |
-| 0.0717        | 5.1235 | 700  | 0.0805          |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0794
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.03
+- num_epochs: 5
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 0.0874        | 0.7319 | 100  | 0.0869          |
+| 0.0781        | 1.4639 | 200  | 0.0809          |
+| 0.0764        | 2.1958 | 300  | 0.0800          |
+| 0.0782        | 2.9277 | 400  | 0.0794          |
+| 0.0769        | 3.6597 | 500  | 0.0795          |
+| 0.0697        | 4.3916 | 600  | 0.0794          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,46 +20,46 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "model.layers.5.self_attn.qkv_proj",
     "model.layers.7.mlp.gate_up_proj",
-    "model.layers.5.self_attn.o_proj",
-    "model.layers.0.mlp.down_proj",
-    "model.layers.0.self_attn.qkv_proj",
     "model.layers.5.mlp.down_proj",
-    "model.layers.1.self_attn.qkv_proj",
     "model.layers.8.self_attn.o_proj",
-    "model.layers.7.self_attn.o_proj",
-    "model.layers.3.mlp.gate_up_proj",
-    "model.layers.4.self_attn.qkv_proj",
     "model.layers.2.self_attn.qkv_proj",
     "model.layers.6.mlp.gate_up_proj",
-    "model.layers.9.self_attn.qkv_proj",
-    "model.layers.4.mlp.down_proj",
-    "model.layers.3.mlp.down_proj",
-    "model.layers.0.mlp.gate_up_proj",
-    "model.layers.4.mlp.gate_up_proj",
-    "model.layers.8.self_attn.qkv_proj",
-    "model.layers.2.mlp.gate_up_proj",
-    "model.layers.1.mlp.gate_up_proj",
-    "model.layers.1.mlp.down_proj",
     "model.layers.9.self_attn.o_proj",
     "model.layers.1.self_attn.o_proj",
     "model.layers.3.self_attn.o_proj",
-    "model.layers.3.self_attn.qkv_proj",
-    "model.layers.6.self_attn.o_proj",
-    "model.layers.8.mlp.down_proj",
     "model.layers.9.mlp.gate_up_proj",
-    "model.layers.4.self_attn.o_proj",
-    "model.layers.9.mlp.down_proj",
-    "model.layers.5.mlp.gate_up_proj",
-    "model.layers.6.self_attn.qkv_proj",
-    "model.layers.2.mlp.down_proj",
-    "model.layers.7.mlp.down_proj",
-    "model.layers.6.mlp.down_proj",
     "model.layers.0.self_attn.o_proj",
     "model.layers.8.mlp.gate_up_proj",
-    "model.layers.2.self_attn.o_proj",
-    "model.layers.7.self_attn.qkv_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "model.layers.7.mlp.gate_up_proj",
+    "model.layers.9.self_attn.qkv_proj",
+    "model.layers.3.self_attn.qkv_proj",
+    "model.layers.6.self_attn.o_proj",
+    "model.layers.0.mlp.gate_up_proj",
+    "model.layers.6.self_attn.qkv_proj",
+    "model.layers.5.mlp.gate_up_proj",
+    "model.layers.4.self_attn.o_proj",
+    "model.layers.7.self_attn.o_proj",
+    "model.layers.8.mlp.down_proj",
     "model.layers.5.mlp.down_proj",
+    "model.layers.6.mlp.down_proj",
+    "model.layers.7.mlp.down_proj",
     "model.layers.8.self_attn.o_proj",
+    "model.layers.1.mlp.down_proj",
+    "model.layers.0.mlp.down_proj",
+    "model.layers.9.mlp.down_proj",
+    "model.layers.1.mlp.gate_up_proj",
     "model.layers.2.self_attn.qkv_proj",
     "model.layers.6.mlp.gate_up_proj",
     "model.layers.9.self_attn.o_proj",
     "model.layers.1.self_attn.o_proj",
+    "model.layers.0.self_attn.qkv_proj",
+    "model.layers.2.self_attn.o_proj",
+    "model.layers.4.self_attn.qkv_proj",
+    "model.layers.5.self_attn.qkv_proj",
+    "model.layers.2.mlp.down_proj",
     "model.layers.3.self_attn.o_proj",
+    "model.layers.1.self_attn.qkv_proj",
+    "model.layers.3.mlp.gate_up_proj",
+    "model.layers.7.self_attn.qkv_proj",
+    "model.layers.4.mlp.gate_up_proj",
+    "model.layers.2.mlp.gate_up_proj",
+    "model.layers.5.self_attn.o_proj",
     "model.layers.9.mlp.gate_up_proj",
+    "model.layers.4.mlp.down_proj",
     "model.layers.0.self_attn.o_proj",
     "model.layers.8.mlp.gate_up_proj",
+    "model.layers.8.self_attn.qkv_proj",
+    "model.layers.3.mlp.down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbade2f32025813d33c877864cd0cd157231b4aaae678630775fed275f3dcceb
 size 31467968

 version https://git-lfs.github.com/spec/v1
+oid sha256:98db53b5bc1313351941c78a90d09712a3eb18c12ee574ff87e18d490cff40a9
 size 31467968

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0d6173d4765145951a74077b323d1ebcd88d532662346e8478d7cac96068d33
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0baf4c802280ead6fd32e178dadafaed4d895f8d1a868f5e768f5b85167071c
 size 5240