djstrong commited on
Commit
277ca2e
β€’
1 Parent(s): c645de7
Files changed (1) hide show
  1. benchmark_results.csv +0 -10
benchmark_results.csv CHANGED
@@ -3,8 +3,6 @@ Bielik_v0.1,2024-06-18 12:48:51,,speakleash/Bielik-7B-Instruct-v0.1,,,47.1,eq-be
3
  Bielik_v0.1,2024-06-18 13:44:54,,speakleash/Bielik-7B-Instruct-v0.1,,,34.17,eq-bench_v2_pl,149.0,1,transformers, ,,
4
  Bielik_v0.1,2024-06-18 14:01:46,,speakleash/Bielik-7B-Instruct-v0.1,,,34.27,eq-bench_v2_pl,156.0,1,transformers, ,,
5
  openchat-gemma,2024-06-18 14:03:04,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
6
- Bielik_v0.2,2024-06-18 14:10:38,,../models/gwint2,,,69.93,eq-bench_v2_pl,171.0,1,transformers, ,,
7
- Bielik_v0.2,2024-06-18 14:23:48,,../models/gwint2,,,72.37,eq-bench_v2,171.0,1,transformers, ,,
8
  openchat-35-0106,2024-06-18 14:30:24,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
9
  openchat-35-0106,2024-06-18 15:15:03,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
10
  glm-4-9b-chat,2024-06-18 15:16:14,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
@@ -36,14 +34,6 @@ openchat-gemma,2024-06-19 10:19:44,,openchat/openchat-3.5-0106-gemma,,,59.93,eq-
36
  Nous-Hermes-2-SOLAR-10.7B,2024-06-19 10:27:36,,NousResearch/Nous-Hermes-2-SOLAR-10.7B,,,48.22,eq-bench_v2_pl,169.0,1,transformers, ,,
37
  SOLAR-10.7B-Instruct-v1.0,2024-06-19 10:43:47,,upstage/SOLAR-10.7B-Instruct-v1.0,,,57.57,eq-bench_v2_pl,164.0,1,transformers, ,,
38
  Qwen2-7B-Instruct,2024-06-19 10:46:52,,Qwen/Qwen2-7B-Instruct,,,53.08,eq-bench_v2_pl,171.0,1,transformers, ,,
39
- models/gwint1/hf,2024-06-19 10:55:32,,models/gwint1/hf,,,FAILED,eq-bench,FAILED,1,transformers, ,,Incorrect path_or_model_id: 'models/gwint1/hf'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
40
- models/gwint2,2024-06-19 10:55:32,,models/gwint2,,,FAILED,eq-bench,FAILED,1,transformers, ,,models/gwint2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
41
- ../models/gwint1/hf,2024-06-19 10:56:07,,models/gwint1/hf,,,FAILED,eq-bench,FAILED,1,transformers, ,,Incorrect path_or_model_id: 'models/gwint1/hf'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
42
- ../models/gwint2,2024-06-19 10:56:07,,models/gwint2,,,FAILED,eq-bench,FAILED,1,transformers, ,,models/gwint2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
43
- ../models/gwint1/hf,2024-06-19 11:04:28,,models/gwint1/hf,,,FAILED,eq-bench,FAILED,1,transformers, ,,Incorrect path_or_model_id: 'models/gwint1/hf'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
44
- ../models/gwint2,2024-06-19 11:04:29,,models/gwint2,,,FAILED,eq-bench,FAILED,1,transformers, ,,models/gwint2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
45
- models/gwint1/hf,2024-06-19 11:15:13,,../models/gwint1/hf,,,37.88,eq-bench_v2_pl,169.0,1,transformers, ,,
46
- models/gwint2,2024-06-19 11:21:15,,../models/gwint2,,,68.24,eq-bench_v2_pl,171.0,1,transformers, ,,
47
  Azurro/APT3-275M-Base,2024-06-19 11:36:43,,Azurro/APT3-275M-Base,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
48
  Qwen/Qwen2-0.5B,2024-06-19 11:47:44,,Qwen/Qwen2-0.5B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,18.0 questions were parseable (min is 83%)
49
  Qwen/Qwen2-0.5B-Instruct,2024-06-19 11:51:21,,Qwen/Qwen2-0.5B-Instruct,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,125.0 questions were parseable (min is 83%)
 
3
  Bielik_v0.1,2024-06-18 13:44:54,,speakleash/Bielik-7B-Instruct-v0.1,,,34.17,eq-bench_v2_pl,149.0,1,transformers, ,,
4
  Bielik_v0.1,2024-06-18 14:01:46,,speakleash/Bielik-7B-Instruct-v0.1,,,34.27,eq-bench_v2_pl,156.0,1,transformers, ,,
5
  openchat-gemma,2024-06-18 14:03:04,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
 
 
6
  openchat-35-0106,2024-06-18 14:30:24,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
7
  openchat-35-0106,2024-06-18 15:15:03,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
8
  glm-4-9b-chat,2024-06-18 15:16:14,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
 
34
  Nous-Hermes-2-SOLAR-10.7B,2024-06-19 10:27:36,,NousResearch/Nous-Hermes-2-SOLAR-10.7B,,,48.22,eq-bench_v2_pl,169.0,1,transformers, ,,
35
  SOLAR-10.7B-Instruct-v1.0,2024-06-19 10:43:47,,upstage/SOLAR-10.7B-Instruct-v1.0,,,57.57,eq-bench_v2_pl,164.0,1,transformers, ,,
36
  Qwen2-7B-Instruct,2024-06-19 10:46:52,,Qwen/Qwen2-7B-Instruct,,,53.08,eq-bench_v2_pl,171.0,1,transformers, ,,
 
 
 
 
 
 
 
 
37
  Azurro/APT3-275M-Base,2024-06-19 11:36:43,,Azurro/APT3-275M-Base,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
38
  Qwen/Qwen2-0.5B,2024-06-19 11:47:44,,Qwen/Qwen2-0.5B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,18.0 questions were parseable (min is 83%)
39
  Qwen/Qwen2-0.5B-Instruct,2024-06-19 11:51:21,,Qwen/Qwen2-0.5B-Instruct,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,125.0 questions were parseable (min is 83%)