Spaces:
Running
Running
davidkim205
commited on
Commit
•
731e515
1
Parent(s):
d05d8fb
add claude-3-5-sonnet-20240620
Browse files- ko_bench.csv +4 -0
ko_bench.csv
CHANGED
@@ -2,6 +2,7 @@ judge_model,turn,model,score,Coding,Extraction,Humanities,Math,Reasoning,Rolepla
|
|
2 |
gpt-4o,1,GPT-4o-2024-05-13,9.4,8.7,9.6,9.6,9.9,9.0,9.2,9.7,9.3
|
3 |
gpt-4o,1,gpt-4-0125-preview,8.9,7.7,9.8,9.1,9.7,7.8,9.2,8.7,9.4
|
4 |
gpt-4o,1,GPT-4o-mini-2024-07-18,8.8,7.3,9.2,9.4,10.0,6.9,8.7,9.6,9.1
|
|
|
5 |
gpt-4o,1,Mistral-Large-Instruct-2407,8.5,6.8,8.9,8.7,9.6,6.6,8.5,9.2,9.5
|
6 |
gpt-4o,1,Qwen2-72B-Instruct,8.3,5.1,9.7,8.9,7.5,7.9,8.8,9.2,9.3
|
7 |
gpt-4o,1,gemma-2-27b-it,8.3,6.8,9.4,9.5,7.9,5.4,9.0,9.0,9.2
|
@@ -26,6 +27,7 @@ gpt-4o,2,GPT-4o-mini-2024-07-18,7.6,6.2,7.6,9.1,7.8,4.6,8.2,9.0,8.3
|
|
26 |
gpt-4o,2,Mistral-Large-Instruct-2407,7.2,6.5,8.8,7.5,7.9,4.7,7.3,7.2,7.6
|
27 |
gpt-4o,2,gemma-2-27b-it,7.0,6.4,7.6,9.0,5.4,5.1,7.9,7.4,7.4
|
28 |
gpt-4o,2,gemini-1.5-pro,7.0,6.3,7.7,8.3,6.1,5.0,8.5,7.8,6.5
|
|
|
29 |
gpt-4o,2,Qwen2-72B-Instruct,6.9,5.5,8.4,8.7,5.3,4.4,7.9,7.4,7.6
|
30 |
gpt-4o,2,ko-gemma-2-9b-it,6.4,5.7,6.9,8.5,5.6,4.3,7.3,6.6,6.5
|
31 |
gpt-4o,2,WizardLM-2-8x22B,6.4,6.0,8.2,7.2,6.1,4.1,7.0,6.8,5.5
|
@@ -44,6 +46,7 @@ gpt-4o,2,WizardLM-13B-V1.2,3.0,2.6,3.5,3.6,1.8,2.3,3.7,3.3,2.8
|
|
44 |
keval,1,GPT-4o-2024-05-13,9.1,7.8,9.5,9.6,9.9,8.8,8.7,9.3,9.2
|
45 |
keval,1,gpt-4-0125-preview,8.8,7.7,9.6,9.2,9.8,7.5,8.2,9.5,9.2
|
46 |
keval,1,GPT-4o-mini-2024-07-18,8.7,7.8,8.2,9.3,10.0,6.9,8.8,9.7,9.2
|
|
|
47 |
keval,1,Mistral-Large-Instruct-2407,8.2,6.3,7.9,8.9,9.6,6.4,8.2,9.5,9.2
|
48 |
keval,1,gemini-1.5-pro,8.2,5.7,9.8,8.8,7.4,6.2,9.1,9.7,9.0
|
49 |
keval,1,gemma-2-27b-it,8.1,5.9,9.3,9.4,7.4,5.7,8.9,9.0,9.0
|
@@ -68,6 +71,7 @@ keval,2,GPT-4o-mini-2024-07-18,7.4,6.8,7.6,8.7,7.7,4.3,7.8,8.4,7.8
|
|
68 |
keval,2,Mistral-Large-Instruct-2407,7.0,5.4,7.3,8.5,7.3,5.2,7.9,7.8,6.9
|
69 |
keval,2,Qwen2-72B-Instruct,7.0,6.2,7.5,8.7,5.5,5.3,7.5,6.9,8.1
|
70 |
keval,2,gemma-2-27b-it,6.9,6.6,7.0,8.9,5.5,5.0,7.6,6.9,7.3
|
|
|
71 |
keval,2,WizardLM-2-8x22B,6.6,5.6,7.6,7.9,6.3,4.9,6.9,7.4,6.3
|
72 |
keval,2,gemini-1.5-pro,6.5,5.2,6.9,8.4,6.0,4.8,8.1,7.3,5.4
|
73 |
keval,2,ko-gemma-2-9b-it,6.4,5.1,6.6,8.9,6.0,4.0,7.2,6.8,6.7
|
|
|
2 |
gpt-4o,1,GPT-4o-2024-05-13,9.4,8.7,9.6,9.6,9.9,9.0,9.2,9.7,9.3
|
3 |
gpt-4o,1,gpt-4-0125-preview,8.9,7.7,9.8,9.1,9.7,7.8,9.2,8.7,9.4
|
4 |
gpt-4o,1,GPT-4o-mini-2024-07-18,8.8,7.3,9.2,9.4,10.0,6.9,8.7,9.6,9.1
|
5 |
+
gpt-4o,1,claude-3-5-sonnet-20240620,8.6,8.1,9.7,9.3,8.7,5.8,8.2,9.4,9.5
|
6 |
gpt-4o,1,Mistral-Large-Instruct-2407,8.5,6.8,8.9,8.7,9.6,6.6,8.5,9.2,9.5
|
7 |
gpt-4o,1,Qwen2-72B-Instruct,8.3,5.1,9.7,8.9,7.5,7.9,8.8,9.2,9.3
|
8 |
gpt-4o,1,gemma-2-27b-it,8.3,6.8,9.4,9.5,7.9,5.4,9.0,9.0,9.2
|
|
|
27 |
gpt-4o,2,Mistral-Large-Instruct-2407,7.2,6.5,8.8,7.5,7.9,4.7,7.3,7.2,7.6
|
28 |
gpt-4o,2,gemma-2-27b-it,7.0,6.4,7.6,9.0,5.4,5.1,7.9,7.4,7.4
|
29 |
gpt-4o,2,gemini-1.5-pro,7.0,6.3,7.7,8.3,6.1,5.0,8.5,7.8,6.5
|
30 |
+
gpt-4o,2,claude-3-5-sonnet-20240620,6.9,6.0,9.0,7.3,6.2,5.8,7.3,6.5,7.5
|
31 |
gpt-4o,2,Qwen2-72B-Instruct,6.9,5.5,8.4,8.7,5.3,4.4,7.9,7.4,7.6
|
32 |
gpt-4o,2,ko-gemma-2-9b-it,6.4,5.7,6.9,8.5,5.6,4.3,7.3,6.6,6.5
|
33 |
gpt-4o,2,WizardLM-2-8x22B,6.4,6.0,8.2,7.2,6.1,4.1,7.0,6.8,5.5
|
|
|
46 |
keval,1,GPT-4o-2024-05-13,9.1,7.8,9.5,9.6,9.9,8.8,8.7,9.3,9.2
|
47 |
keval,1,gpt-4-0125-preview,8.8,7.7,9.6,9.2,9.8,7.5,8.2,9.5,9.2
|
48 |
keval,1,GPT-4o-mini-2024-07-18,8.7,7.8,8.2,9.3,10.0,6.9,8.8,9.7,9.2
|
49 |
+
keval,1,claude-3-5-sonnet-20240620,8.4,8.1,9.8,8.7,8.3,5.8,7.9,9.2,9.0
|
50 |
keval,1,Mistral-Large-Instruct-2407,8.2,6.3,7.9,8.9,9.6,6.4,8.2,9.5,9.2
|
51 |
keval,1,gemini-1.5-pro,8.2,5.7,9.8,8.8,7.4,6.2,9.1,9.7,9.0
|
52 |
keval,1,gemma-2-27b-it,8.1,5.9,9.3,9.4,7.4,5.7,8.9,9.0,9.0
|
|
|
71 |
keval,2,Mistral-Large-Instruct-2407,7.0,5.4,7.3,8.5,7.3,5.2,7.9,7.8,6.9
|
72 |
keval,2,Qwen2-72B-Instruct,7.0,6.2,7.5,8.7,5.5,5.3,7.5,6.9,8.1
|
73 |
keval,2,gemma-2-27b-it,6.9,6.6,7.0,8.9,5.5,5.0,7.6,6.9,7.3
|
74 |
+
keval,2,claude-3-5-sonnet-20240620,6.8,6.2,8.4,7.8,5.4,5.1,7.0,7.3,7.5
|
75 |
keval,2,WizardLM-2-8x22B,6.6,5.6,7.6,7.9,6.3,4.9,6.9,7.4,6.3
|
76 |
keval,2,gemini-1.5-pro,6.5,5.2,6.9,8.4,6.0,4.8,8.1,7.3,5.4
|
77 |
keval,2,ko-gemma-2-9b-it,6.4,5.1,6.6,8.9,6.0,4.0,7.2,6.8,6.7
|