Update README.md
Browse files
README.md
CHANGED
@@ -3,10 +3,12 @@ license: mit
|
|
3 |
datasets:
|
4 |
- mlabonne/orpo-dpo-mix-40k
|
5 |
---
|
6 |
-
Abliterated using the following the guide here:
|
7 |
|
8 |
-
|
9 |
|
|
|
|
|
|
|
10 |
|
11 |
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
|
12 |
<details><summary>See axolotl config</summary>
|
@@ -85,6 +87,40 @@ max_grad_norm: 1.0
|
|
85 |
resize_token_embeddings_to_32x: true
|
86 |
```
|
87 |
|
|
|
|
|
|
|
88 |
## Quants
|
89 |
|
90 |
-
GGUF: https://huggingface.co/cowWhySo/Phi-3-mini-4k-instruct-Friendly-gguf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
datasets:
|
4 |
- mlabonne/orpo-dpo-mix-40k
|
5 |
---
|
|
|
6 |
|
7 |
+
This is a uncenscored version of Phi-3.
|
8 |
|
9 |
+
Abliterated using the following the guide here: https://huggingface.co/blog/mlabonne/abliteration
|
10 |
+
|
11 |
+
Then it was fine tuned on orpo-dpo-mix-40k
|
12 |
|
13 |
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
|
14 |
<details><summary>See axolotl config</summary>
|
|
|
87 |
resize_token_embeddings_to_32x: true
|
88 |
```
|
89 |
|
90 |
+
</details><br>
|
91 |
+
|
92 |
+
|
93 |
## Quants
|
94 |
|
95 |
+
GGUF: https://huggingface.co/cowWhySo/Phi-3-mini-4k-instruct-Friendly-gguf
|
96 |
+
|
97 |
+
## Training Summary
|
98 |
+
|
99 |
+
```json
|
100 |
+
{
|
101 |
+
"train/loss": 0.299,
|
102 |
+
"train/grad_norm": 0.9337566701340533,
|
103 |
+
"train/learning_rate": 0,
|
104 |
+
"train/rewards/chosen": 0.08704188466072083,
|
105 |
+
"train/rewards/rejected": -2.835820436477661,
|
106 |
+
"train/rewards/accuracies": 0.84375,
|
107 |
+
"train/rewards/margins": 2.9228620529174805,
|
108 |
+
"train/logps/rejected": -509.9840393066406,
|
109 |
+
"train/logps/chosen": -560.8234252929688,
|
110 |
+
"train/logits/rejected": 1.6356163024902344,
|
111 |
+
"train/logits/chosen": 1.7323706150054932,
|
112 |
+
"train/epoch": 1.002169197396963,
|
113 |
+
"train/global_step": 231,
|
114 |
+
"_timestamp": 1717711643.3345022,
|
115 |
+
"_runtime": 22808.557655334473,
|
116 |
+
"_step": 231,
|
117 |
+
"train_runtime": 22809.152,
|
118 |
+
"train_samples_per_second": 1.944,
|
119 |
+
"train_steps_per_second": 0.01,
|
120 |
+
"total_flos": 0,
|
121 |
+
"train_loss": 0.44557410065745895,
|
122 |
+
"_wandb": {
|
123 |
+
"runtime": 22810
|
124 |
+
}
|
125 |
+
}
|
126 |
+
```
|