Spaces:
Running
Running
eq bench
Browse files- benchmark_results.csv +0 -10
benchmark_results.csv
CHANGED
@@ -3,8 +3,6 @@ Bielik_v0.1,2024-06-18 12:48:51,,speakleash/Bielik-7B-Instruct-v0.1,,,47.1,eq-be
|
|
3 |
Bielik_v0.1,2024-06-18 13:44:54,,speakleash/Bielik-7B-Instruct-v0.1,,,34.17,eq-bench_v2_pl,149.0,1,transformers, ,,
|
4 |
Bielik_v0.1,2024-06-18 14:01:46,,speakleash/Bielik-7B-Instruct-v0.1,,,34.27,eq-bench_v2_pl,156.0,1,transformers, ,,
|
5 |
openchat-gemma,2024-06-18 14:03:04,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
|
6 |
-
Bielik_v0.2,2024-06-18 14:10:38,,../models/gwint2,,,69.93,eq-bench_v2_pl,171.0,1,transformers, ,,
|
7 |
-
Bielik_v0.2,2024-06-18 14:23:48,,../models/gwint2,,,72.37,eq-bench_v2,171.0,1,transformers, ,,
|
8 |
openchat-35-0106,2024-06-18 14:30:24,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
|
9 |
openchat-35-0106,2024-06-18 15:15:03,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
|
10 |
glm-4-9b-chat,2024-06-18 15:16:14,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
|
@@ -36,14 +34,6 @@ openchat-gemma,2024-06-19 10:19:44,,openchat/openchat-3.5-0106-gemma,,,59.93,eq-
|
|
36 |
Nous-Hermes-2-SOLAR-10.7B,2024-06-19 10:27:36,,NousResearch/Nous-Hermes-2-SOLAR-10.7B,,,48.22,eq-bench_v2_pl,169.0,1,transformers, ,,
|
37 |
SOLAR-10.7B-Instruct-v1.0,2024-06-19 10:43:47,,upstage/SOLAR-10.7B-Instruct-v1.0,,,57.57,eq-bench_v2_pl,164.0,1,transformers, ,,
|
38 |
Qwen2-7B-Instruct,2024-06-19 10:46:52,,Qwen/Qwen2-7B-Instruct,,,53.08,eq-bench_v2_pl,171.0,1,transformers, ,,
|
39 |
-
models/gwint1/hf,2024-06-19 10:55:32,,models/gwint1/hf,,,FAILED,eq-bench,FAILED,1,transformers, ,,Incorrect path_or_model_id: 'models/gwint1/hf'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
|
40 |
-
models/gwint2,2024-06-19 10:55:32,,models/gwint2,,,FAILED,eq-bench,FAILED,1,transformers, ,,models/gwint2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
|
41 |
-
../models/gwint1/hf,2024-06-19 10:56:07,,models/gwint1/hf,,,FAILED,eq-bench,FAILED,1,transformers, ,,Incorrect path_or_model_id: 'models/gwint1/hf'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
|
42 |
-
../models/gwint2,2024-06-19 10:56:07,,models/gwint2,,,FAILED,eq-bench,FAILED,1,transformers, ,,models/gwint2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
|
43 |
-
../models/gwint1/hf,2024-06-19 11:04:28,,models/gwint1/hf,,,FAILED,eq-bench,FAILED,1,transformers, ,,Incorrect path_or_model_id: 'models/gwint1/hf'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
|
44 |
-
../models/gwint2,2024-06-19 11:04:29,,models/gwint2,,,FAILED,eq-bench,FAILED,1,transformers, ,,models/gwint2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
|
45 |
-
models/gwint1/hf,2024-06-19 11:15:13,,../models/gwint1/hf,,,37.88,eq-bench_v2_pl,169.0,1,transformers, ,,
|
46 |
-
models/gwint2,2024-06-19 11:21:15,,../models/gwint2,,,68.24,eq-bench_v2_pl,171.0,1,transformers, ,,
|
47 |
Azurro/APT3-275M-Base,2024-06-19 11:36:43,,Azurro/APT3-275M-Base,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
|
48 |
Qwen/Qwen2-0.5B,2024-06-19 11:47:44,,Qwen/Qwen2-0.5B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,18.0 questions were parseable (min is 83%)
|
49 |
Qwen/Qwen2-0.5B-Instruct,2024-06-19 11:51:21,,Qwen/Qwen2-0.5B-Instruct,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,125.0 questions were parseable (min is 83%)
|
|
|
3 |
Bielik_v0.1,2024-06-18 13:44:54,,speakleash/Bielik-7B-Instruct-v0.1,,,34.17,eq-bench_v2_pl,149.0,1,transformers, ,,
|
4 |
Bielik_v0.1,2024-06-18 14:01:46,,speakleash/Bielik-7B-Instruct-v0.1,,,34.27,eq-bench_v2_pl,156.0,1,transformers, ,,
|
5 |
openchat-gemma,2024-06-18 14:03:04,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
|
|
|
|
|
6 |
openchat-35-0106,2024-06-18 14:30:24,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
|
7 |
openchat-35-0106,2024-06-18 15:15:03,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
|
8 |
glm-4-9b-chat,2024-06-18 15:16:14,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
|
|
|
34 |
Nous-Hermes-2-SOLAR-10.7B,2024-06-19 10:27:36,,NousResearch/Nous-Hermes-2-SOLAR-10.7B,,,48.22,eq-bench_v2_pl,169.0,1,transformers, ,,
|
35 |
SOLAR-10.7B-Instruct-v1.0,2024-06-19 10:43:47,,upstage/SOLAR-10.7B-Instruct-v1.0,,,57.57,eq-bench_v2_pl,164.0,1,transformers, ,,
|
36 |
Qwen2-7B-Instruct,2024-06-19 10:46:52,,Qwen/Qwen2-7B-Instruct,,,53.08,eq-bench_v2_pl,171.0,1,transformers, ,,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
Azurro/APT3-275M-Base,2024-06-19 11:36:43,,Azurro/APT3-275M-Base,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
|
38 |
Qwen/Qwen2-0.5B,2024-06-19 11:47:44,,Qwen/Qwen2-0.5B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,18.0 questions were parseable (min is 83%)
|
39 |
Qwen/Qwen2-0.5B-Instruct,2024-06-19 11:51:21,,Qwen/Qwen2-0.5B-Instruct,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,125.0 questions were parseable (min is 83%)
|