southmost commited on
Commit
e316c98
1 Parent(s): 8d96815

southmost/phi2-lora-distilabel-intel-orca-dpo-pairs-init

Browse files
README.md CHANGED
@@ -18,15 +18,15 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4547
22
- - Rewards/chosen: -0.0932
23
- - Rewards/rejected: -1.3103
24
- - Rewards/accuracies: 0.8386
25
- - Rewards/margins: 1.2171
26
- - Logps/rejected: -222.2418
27
- - Logps/chosen: -199.7473
28
- - Logits/rejected: 0.5130
29
- - Logits/chosen: 0.3441
30
 
31
  ## Model description
32
 
@@ -60,13 +60,13 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.5579 | 0.78 | 250 | 0.4547 | -0.0932 | -1.3103 | 0.8386 | 1.2171 | -222.2418 | -199.7473 | 0.5130 | 0.3441 |
64
 
65
 
66
  ### Framework versions
67
 
68
- - PEFT 0.8.2
69
- - Transformers 4.37.2
70
- - Pytorch 2.2.0+cu121
71
- - Datasets 2.17.0
72
  - Tokenizers 0.15.2
 
18
 
19
  This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.4467
22
+ - Rewards/chosen: -0.0981
23
+ - Rewards/rejected: -1.3106
24
+ - Rewards/accuracies: 0.8410
25
+ - Rewards/margins: 1.2125
26
+ - Logps/rejected: -228.4777
27
+ - Logps/chosen: -209.0628
28
+ - Logits/rejected: 0.4528
29
+ - Logits/chosen: 0.2946
30
 
31
  ## Model description
32
 
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.5578 | 0.78 | 250 | 0.4467 | -0.0981 | -1.3106 | 0.8410 | 1.2125 | -228.4777 | -209.0628 | 0.4528 | 0.2946 |
64
 
65
 
66
  ### Framework versions
67
 
68
+ - PEFT 0.9.0
69
+ - Transformers 4.38.2
70
+ - Pytorch 2.1.0+cu118
71
+ - Datasets 2.18.0
72
  - Tokenizers 0.15.2
adapter_config.json CHANGED
@@ -19,12 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "fc2",
23
  "q_proj",
24
- "k_proj",
25
- "fc1",
26
  "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
 
29
  "use_rslora": false
30
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "fc1",
23
+ "k_proj",
24
  "fc2",
25
  "q_proj",
 
 
26
  "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
+ "use_dora": false,
30
  "use_rslora": false
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c945a28678bc285f469548170ea6c61f4ac181d365413b7e0be3345e0e70a9e
3
  size 167814424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0554899608cddc7554993c55ba320691f2dc5147a342d877731dbcc2946a7d8a
3
  size 167814424
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16ab228666e6daad95fa26140b1b5ce43e3675dd453bc14833bdd9a7923cb0bd
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af5bb41ca4173df8ec08e5e3443d56547ad580089b002e895a1309007ad0395
3
+ size 4920