ShilpaSandhya commited on
Commit
772827e
1 Parent(s): f5d77b3

ShilpaSandhya/phi3_5_mini_lora_chemical_eng_flash

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.0793
20
 
21
  ## Model description
22
 
@@ -44,20 +44,19 @@ The following hyperparameters were used during training:
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.03
47
- - num_epochs: 10
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:------:|:----:|:---------------:|
54
- | 0.0877 | 0.7319 | 100 | 0.0874 |
55
- | 0.0785 | 1.4639 | 200 | 0.0812 |
56
- | 0.0763 | 2.1958 | 300 | 0.0799 |
57
- | 0.0782 | 2.9277 | 400 | 0.0793 |
58
- | 0.0764 | 3.6597 | 500 | 0.0795 |
59
- | 0.0683 | 4.3916 | 600 | 0.0798 |
60
- | 0.0717 | 5.1235 | 700 | 0.0805 |
61
 
62
 
63
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.0794
20
 
21
  ## Model description
22
 
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.03
47
+ - num_epochs: 5
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:------:|:----:|:---------------:|
54
+ | 0.0874 | 0.7319 | 100 | 0.0869 |
55
+ | 0.0781 | 1.4639 | 200 | 0.0809 |
56
+ | 0.0764 | 2.1958 | 300 | 0.0800 |
57
+ | 0.0782 | 2.9277 | 400 | 0.0794 |
58
+ | 0.0769 | 3.6597 | 500 | 0.0795 |
59
+ | 0.0697 | 4.3916 | 600 | 0.0794 |
 
60
 
61
 
62
  ### Framework versions
adapter_config.json CHANGED
@@ -20,46 +20,46 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "model.layers.5.self_attn.qkv_proj",
24
  "model.layers.7.mlp.gate_up_proj",
25
- "model.layers.5.self_attn.o_proj",
26
- "model.layers.0.mlp.down_proj",
27
- "model.layers.0.self_attn.qkv_proj",
 
 
 
 
 
 
28
  "model.layers.5.mlp.down_proj",
29
- "model.layers.1.self_attn.qkv_proj",
 
30
  "model.layers.8.self_attn.o_proj",
31
- "model.layers.7.self_attn.o_proj",
32
- "model.layers.3.mlp.gate_up_proj",
33
- "model.layers.4.self_attn.qkv_proj",
 
34
  "model.layers.2.self_attn.qkv_proj",
35
  "model.layers.6.mlp.gate_up_proj",
36
- "model.layers.9.self_attn.qkv_proj",
37
- "model.layers.4.mlp.down_proj",
38
- "model.layers.3.mlp.down_proj",
39
- "model.layers.0.mlp.gate_up_proj",
40
- "model.layers.4.mlp.gate_up_proj",
41
- "model.layers.8.self_attn.qkv_proj",
42
- "model.layers.2.mlp.gate_up_proj",
43
- "model.layers.1.mlp.gate_up_proj",
44
- "model.layers.1.mlp.down_proj",
45
  "model.layers.9.self_attn.o_proj",
46
  "model.layers.1.self_attn.o_proj",
 
 
 
 
 
47
  "model.layers.3.self_attn.o_proj",
48
- "model.layers.3.self_attn.qkv_proj",
49
- "model.layers.6.self_attn.o_proj",
50
- "model.layers.8.mlp.down_proj",
 
 
 
51
  "model.layers.9.mlp.gate_up_proj",
52
- "model.layers.4.self_attn.o_proj",
53
- "model.layers.9.mlp.down_proj",
54
- "model.layers.5.mlp.gate_up_proj",
55
- "model.layers.6.self_attn.qkv_proj",
56
- "model.layers.2.mlp.down_proj",
57
- "model.layers.7.mlp.down_proj",
58
- "model.layers.6.mlp.down_proj",
59
  "model.layers.0.self_attn.o_proj",
60
  "model.layers.8.mlp.gate_up_proj",
61
- "model.layers.2.self_attn.o_proj",
62
- "model.layers.7.self_attn.qkv_proj"
63
  ],
64
  "task_type": "CAUSAL_LM",
65
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "model.layers.7.mlp.gate_up_proj",
24
+ "model.layers.9.self_attn.qkv_proj",
25
+ "model.layers.3.self_attn.qkv_proj",
26
+ "model.layers.6.self_attn.o_proj",
27
+ "model.layers.0.mlp.gate_up_proj",
28
+ "model.layers.6.self_attn.qkv_proj",
29
+ "model.layers.5.mlp.gate_up_proj",
30
+ "model.layers.4.self_attn.o_proj",
31
+ "model.layers.7.self_attn.o_proj",
32
+ "model.layers.8.mlp.down_proj",
33
  "model.layers.5.mlp.down_proj",
34
+ "model.layers.6.mlp.down_proj",
35
+ "model.layers.7.mlp.down_proj",
36
  "model.layers.8.self_attn.o_proj",
37
+ "model.layers.1.mlp.down_proj",
38
+ "model.layers.0.mlp.down_proj",
39
+ "model.layers.9.mlp.down_proj",
40
+ "model.layers.1.mlp.gate_up_proj",
41
  "model.layers.2.self_attn.qkv_proj",
42
  "model.layers.6.mlp.gate_up_proj",
 
 
 
 
 
 
 
 
 
43
  "model.layers.9.self_attn.o_proj",
44
  "model.layers.1.self_attn.o_proj",
45
+ "model.layers.0.self_attn.qkv_proj",
46
+ "model.layers.2.self_attn.o_proj",
47
+ "model.layers.4.self_attn.qkv_proj",
48
+ "model.layers.5.self_attn.qkv_proj",
49
+ "model.layers.2.mlp.down_proj",
50
  "model.layers.3.self_attn.o_proj",
51
+ "model.layers.1.self_attn.qkv_proj",
52
+ "model.layers.3.mlp.gate_up_proj",
53
+ "model.layers.7.self_attn.qkv_proj",
54
+ "model.layers.4.mlp.gate_up_proj",
55
+ "model.layers.2.mlp.gate_up_proj",
56
+ "model.layers.5.self_attn.o_proj",
57
  "model.layers.9.mlp.gate_up_proj",
58
+ "model.layers.4.mlp.down_proj",
 
 
 
 
 
 
59
  "model.layers.0.self_attn.o_proj",
60
  "model.layers.8.mlp.gate_up_proj",
61
+ "model.layers.8.self_attn.qkv_proj",
62
+ "model.layers.3.mlp.down_proj"
63
  ],
64
  "task_type": "CAUSAL_LM",
65
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbade2f32025813d33c877864cd0cd157231b4aaae678630775fed275f3dcceb
3
  size 31467968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98db53b5bc1313351941c78a90d09712a3eb18c12ee574ff87e18d490cff40a9
3
  size 31467968
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0d6173d4765145951a74077b323d1ebcd88d532662346e8478d7cac96068d33
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0baf4c802280ead6fd32e178dadafaed4d895f8d1a868f5e768f5b85167071c
3
  size 5240