alpayariyak
commited on
Commit
β’
29b3d50
1
Parent(s):
96cc489
Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
<div align="center">
|
2 |
<img src="https://raw.githubusercontent.com/imoneoi/openchat/master/assets/logo_new.png" style="width: 65%">
|
3 |
<h1>Advancing Open-source Language Models with Mixed-Quality Data</h1>
|
@@ -23,31 +24,48 @@
|
|
23 |
</p>
|
24 |
|
25 |
<hr>
|
26 |
-
<div style="background-color: white; padding: 0.7em; border-radius: 0.5em; color: black; display: flex; flex-direction: column; justify-content: center; text-align: center;
|
27 |
<a href="https://huggingface.co/openchat/openchat_3.5" style="text-decoration: none; color: black;">
|
28 |
-
|
29 |
-
<span style="font-size: 1.7em; font-family: 'Helvetica'; letter-spacing: 0.1em; font-weight: bold; color: black;">OPENCHAT</span><span style="font-size: 1.8em; font-family: 'Helvetica'; color: #3c72db;
|
30 |
-
|
31 |
-
<span style="font-size:
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
font-family: 'Helvetica'; color: black; font-weight: bold;">
|
36 |
-
|
37 |
-
<br
|
38 |
-
<br>
|
|
|
39 |
</span>
|
40 |
</a>
|
41 |
</div>
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
<div style="display: flex; justify-content: center; align-items: center">
|
44 |
-
<img src="https://github.com/alpayariyak/openchat/blob/master/assets/1210bench.png?raw=true" style="width: 100%; border-radius: 1em">
|
45 |
</div>
|
46 |
|
47 |
<div>
|
48 |
<h3> Table of Contents</h3>
|
49 |
</div>
|
50 |
|
|
|
51 |
1. [Usage](#usage)
|
52 |
2. [Benchmarks](#benchmarks)
|
53 |
3. [Limitations](#limitations)
|
@@ -174,7 +192,6 @@ Score 5: {orig_score5_description}
|
|
174 |
| OpenOrca Mistral | 7B | 52.7 | 6.86 | 38.4 | 49.4 | 42.9 | 45.9 | 59.3 | 59.1 | 58.1 |
|
175 |
| Zephyr-Ξ²^ | 7B | 34.6 | 7.34 | 22.0 | 40.6 | 39.0 | 40.8 | 39.8 | 5.1 | 16.0 |
|
176 |
| Mistral | 7B | - | 6.84 | 30.5 | 39.0 | 38.0 | - | 60.1 | 52.2 | - |
|
177 |
-
|
178 |
<details>
|
179 |
<summary>Evaluation Details(click to expand)</summary>
|
180 |
*: ChatGPT (March) results are from [GPT-4 Technical Report](https://arxiv.org/abs/2303.08774), [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub), and our evaluation. Please note that ChatGPT is not a fixed baseline and evolves rapidly over time.
|
@@ -189,6 +206,7 @@ All models are evaluated in chat mode (e.g. with the respective conversation tem
|
|
189 |
<h3>HumanEval+</h3>
|
190 |
</div>
|
191 |
|
|
|
192 |
| Model | Size | HumanEval+ pass@1 |
|
193 |
|-----------------------------|----------|------------|
|
194 |
| ChatGPT (December 12, 2023) | - | 64.6 |
|
@@ -209,6 +227,12 @@ All models are evaluated in chat mode (e.g. with the respective conversation tem
|
|
209 |
|
210 |
*: Grok results are reported by [X.AI](https://x.ai/).
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
<div align="center">
|
213 |
<h2> Limitations </h2>
|
214 |
</div>
|
@@ -226,6 +250,7 @@ OpenChat may sometimes generate information that does not exist or is not accura
|
|
226 |
**Safety**
|
227 |
OpenChat may sometimes generate harmful, hate speech, biased responses, or answer unsafe questions. It's crucial to apply additional AI safety measures in use cases that require safe and moderated responses.
|
228 |
|
|
|
229 |
<div align="center">
|
230 |
<h2> License </h2>
|
231 |
</div>
|
@@ -251,7 +276,6 @@ OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-
|
|
251 |
<div align="center">
|
252 |
<h2> Citation </h2>
|
253 |
</div>
|
254 |
-
|
255 |
```
|
256 |
@article{wang2023openchat,
|
257 |
title={OpenChat: Advancing Open-source Language Models with Mixed-Quality Data},
|
|
|
1 |
+
|
2 |
<div align="center">
|
3 |
<img src="https://raw.githubusercontent.com/imoneoi/openchat/master/assets/logo_new.png" style="width: 65%">
|
4 |
<h1>Advancing Open-source Language Models with Mixed-Quality Data</h1>
|
|
|
24 |
</p>
|
25 |
|
26 |
<hr>
|
27 |
+
<div style="background-color: white; padding: 0.7em; border-radius: 0.5em; color: black; display: flex; flex-direction: column; justify-content: center; text-align: center;">
|
28 |
<a href="https://huggingface.co/openchat/openchat_3.5" style="text-decoration: none; color: black;">
|
29 |
+
<span style="font-size: 0.7em; font-family: 'Helvetica'; color: white; background-color:white; border-radius: 6em; padding: 0.04em 0.4em; letter-spacing: 0.1em; font-weight: bold">3.51210</span>
|
30 |
+
<span style="font-size: 1.7em; font-family: 'Helvetica'; letter-spacing: 0.1em; font-weight: bold; color: black;">OPENCHAT</span><span style="font-size: 1.8em; font-family: 'Helvetica'; color: #3c72db;">3.5</span>
|
31 |
+
<span style="font-size: 0.7em; font-family: 'Helvetica'; color: white; background-color:red; border-radius: 6em; padding: 0.066em 0.4em; letter-spacing: 0.1em; font-weight: bold; vertical-align: top;">1210</span><br>
|
32 |
+
<span style="font-size: 2vw; font-family: 'Helvetica'; color: black; white-space: nowrap;">
|
33 |
+
π The Overall Best Performing Open Source 7B Model π
|
34 |
+
</span>
|
35 |
+
<br> <span style="font-size: 2vw; font-family: 'Helvetica'; color: black; white-space: nowrap;">π€ Outperforms <span style="font-weight: bold;">ChatGPT</span> (March) and <span style="font-weight: bold;">Grok-1</span> on most benchmarks π€</span>
|
36 |
+
<br> <span style="font-size: 2vw; font-family: 'Helvetica'; color: black; white-space: nowrap;">π <span style="font-size: 1em; font-family: 'Helvetica'; color: black; font-weight: bold;">15</span>-point improvement in Coding Performance over <span style="font-size: 0.9em;
|
37 |
+
font-family: 'Helvetica'; color: black; font-weight: bold;">OpenChat-3.5 π</span></span>
|
38 |
+
<br><span style="font-size: 2vw; font-family: 'Helvetica'; color: #3c72db; font-weight: bold; white-space: nowrap;">New Features</span>
|
39 |
+
<br> <span style="font-size: 2vw; font-family: 'Helvetica'; color: black; white-space: nowrap;">π‘ 2 Modes: Coding + Generalist, Mathematical Reasoning π‘</span>
|
40 |
+
<br><span style="font-size: 2vw; font-family: 'Helvetica'; color: black; white-space: nowrap;"> π§ββοΈ Experimental support for Evaluator and Feedback capabilities π§ββοΈ</span>
|
41 |
</span>
|
42 |
</a>
|
43 |
</div>
|
44 |
|
45 |
+
<!-- <a href="https://huggingface.co/openchat/openchat_3.5">
|
46 |
+
<button class="common-button">Model Repo</button>
|
47 |
+
</a>
|
48 |
+
<a href="https://openchat.team">
|
49 |
+
<button class="common-button">OpenChatUI Demo</button>
|
50 |
+
</a>
|
51 |
+
<a href="https://huggingface.co/spaces/openchat/openchat_3.5">
|
52 |
+
<button class="common-button">HuggingFace Space</button>
|
53 |
+
</a>
|
54 |
+
<a href="https://arxiv.org/pdf/2309.11235.pdf">
|
55 |
+
<button class="common-button">Paper</button>
|
56 |
+
</a>
|
57 |
+
-->
|
58 |
+
</p>
|
59 |
+
|
60 |
<div style="display: flex; justify-content: center; align-items: center">
|
61 |
+
<img src="https://github.com/alpayariyak/openchat/blob/master/assets/1210bench.png?raw=true" style="width: 100%; border-radius: 1em">">
|
62 |
</div>
|
63 |
|
64 |
<div>
|
65 |
<h3> Table of Contents</h3>
|
66 |
</div>
|
67 |
|
68 |
+
|
69 |
1. [Usage](#usage)
|
70 |
2. [Benchmarks](#benchmarks)
|
71 |
3. [Limitations](#limitations)
|
|
|
192 |
| OpenOrca Mistral | 7B | 52.7 | 6.86 | 38.4 | 49.4 | 42.9 | 45.9 | 59.3 | 59.1 | 58.1 |
|
193 |
| Zephyr-Ξ²^ | 7B | 34.6 | 7.34 | 22.0 | 40.6 | 39.0 | 40.8 | 39.8 | 5.1 | 16.0 |
|
194 |
| Mistral | 7B | - | 6.84 | 30.5 | 39.0 | 38.0 | - | 60.1 | 52.2 | - |
|
|
|
195 |
<details>
|
196 |
<summary>Evaluation Details(click to expand)</summary>
|
197 |
*: ChatGPT (March) results are from [GPT-4 Technical Report](https://arxiv.org/abs/2303.08774), [Chain-of-Thought Hub](https://github.com/FranxYao/chain-of-thought-hub), and our evaluation. Please note that ChatGPT is not a fixed baseline and evolves rapidly over time.
|
|
|
206 |
<h3>HumanEval+</h3>
|
207 |
</div>
|
208 |
|
209 |
+
|
210 |
| Model | Size | HumanEval+ pass@1 |
|
211 |
|-----------------------------|----------|------------|
|
212 |
| ChatGPT (December 12, 2023) | - | 64.6 |
|
|
|
227 |
|
228 |
*: Grok results are reported by [X.AI](https://x.ai/).
|
229 |
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
+
|
234 |
+
|
235 |
+
|
236 |
<div align="center">
|
237 |
<h2> Limitations </h2>
|
238 |
</div>
|
|
|
250 |
**Safety**
|
251 |
OpenChat may sometimes generate harmful, hate speech, biased responses, or answer unsafe questions. It's crucial to apply additional AI safety measures in use cases that require safe and moderated responses.
|
252 |
|
253 |
+
## License
|
254 |
<div align="center">
|
255 |
<h2> License </h2>
|
256 |
</div>
|
|
|
276 |
<div align="center">
|
277 |
<h2> Citation </h2>
|
278 |
</div>
|
|
|
279 |
```
|
280 |
@article{wang2023openchat,
|
281 |
title={OpenChat: Advancing Open-source Language Models with Mixed-Quality Data},
|