Spaces:
Running
Running
File size: 24,683 Bytes
397694c 672a5d6 397694c 492a075 d6018f5 c645de7 672a5d6 73b9490 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
Run ID, Benchmark Completed, Prompt Format, Model Path, Lora Path, Quantization, Benchmark Score, Benchmark Version, Num Questions Parseable, Num Iterations, Inference Engine, Ooba Params, Download Filters, Error
Bielik_v0.1,2024-06-18 12:48:51,,speakleash/Bielik-7B-Instruct-v0.1,,,47.1,eq-bench_v2,170.0,1,transformers, ,,
Bielik_v0.1,2024-06-18 13:44:54,,speakleash/Bielik-7B-Instruct-v0.1,,,34.17,eq-bench_v2_pl,149.0,1,transformers, ,,
Bielik_v0.1,2024-06-18 14:01:46,,speakleash/Bielik-7B-Instruct-v0.1,,,34.27,eq-bench_v2_pl,156.0,1,transformers, ,,
openchat-gemma,2024-06-18 14:03:04,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
openchat-35-0106,2024-06-18 14:30:24,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
openchat-35-0106,2024-06-18 15:15:03,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
glm-4-9b-chat,2024-06-18 15:16:14,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
openchat-35-0106,2024-06-18 15:19:01,,openchat/openchat-3.5-0106,,,72.92,eq-bench_v2,171.0,1,transformers, ,,
glm-4-9b-chat,2024-06-18 15:20:10,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
openchat-35-0106,2024-06-18 15:22:41,,openchat/openchat-3.5-0106,,,45.69,eq-bench_v2_pl,170.0,1,transformers, ,,
glm-4-9b-chat,2024-06-18 15:23:50,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
glm-4-9b-chat,2024-06-18 15:26:30,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
glm-4-9b-chat,2024-06-18 16:30:21,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,
glm-4-9b-chat-1m,2024-06-18 16:54:28,,THUDM/glm-4-9b-chat-1m,,,FAILED,eq-bench,FAILED,1,transformers, ,,
glm-4-9b-chat-1m,2024-06-18 17:05:16,,THUDM/glm-4-9b-chat-1m,,,FAILED,eq-bench,FAILED,1,transformers, ,,
openchat-3.6-8b-20240522,2024-06-18 17:12:00,,openchat/openchat-3.6-8b-20240522,,,-1.339640900815702e+23,eq-bench_v2,171.0,1,transformers, ,,
openchat-gemma,2024-06-18 17:13:12,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
Meta-Llama-3-8B-Instruct,2024-06-18 21:29:03,,meta-llama/Meta-Llama-3-8B-Instruct,,,69.09,eq-bench_v2,171.0,1,transformers, ,,
Starling-LM-7B-alpha,2024-06-18 21:45:18,,berkeley-nest/Starling-LM-7B-alpha,,,49.63,eq-bench_v2_pl,171.0,1,transformers, ,,
Starling-LM-7B-beta,2024-06-18 21:51:54,,Nexusflow/Starling-LM-7B-beta,,,44.91,eq-bench_v2_pl,159.0,1,transformers, ,,
Mistral-7B-Instruct-v0.2,2024-06-18 21:52:17,,mistralai/Mistral-7B-Instruct-v0.2,,,FAILED,eq-bench,FAILED,1,transformers, ,,Conversation roles must alternate user/assistant/user/assistant/...
Mistral-7B-Instruct-v0.1,2024-06-18 22:26:07,,mistralai/Mistral-7B-Instruct-v0.1,,,FAILED,eq-bench,FAILED,1,transformers, ,,Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)
Meta-Llama-3-8B-Instruct,2024-06-18 22:35:53,,meta-llama/Meta-Llama-3-8B-Instruct,,,46.53,eq-bench_v2_pl,171.0,1,transformers, ,,
openchat-gemma,2024-06-19 09:30:28,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
Mistral-7B-Instruct-v0.2,2024-06-19 09:30:46,,mistralai/Mistral-7B-Instruct-v0.2,,,FAILED,eq-bench,FAILED,1,transformers, ,,Conversation roles must alternate user/assistant/user/assistant/...
openchat-gemma,2024-06-19 09:35:50,,openchat/openchat-3.5-0106-gemma,,,FAILED,eq-bench,FAILED,1,transformers, ,,System role not supported
Mistral-7B-Instruct-v0.2,2024-06-19 09:36:01,,mistralai/Mistral-7B-Instruct-v0.2,,,FAILED,eq-bench,FAILED,1,transformers, ,,Conversation roles must alternate user/assistant/user/assistant/...
openchat-gemma,2024-06-19 09:43:53,,openchat/openchat-3.5-0106-gemma,,,60.11,eq-bench_v2_pl,169.0,1,transformers, ,,
Mistral-7B-Instruct-v0.2,2024-06-19 09:49:42,,mistralai/Mistral-7B-Instruct-v0.2,,,52.99,eq-bench_v2_pl,148.0,1,transformers, ,,
openchat-gemma,2024-06-19 09:54:01,,openchat/openchat-3.5-0106-gemma,,,60.11,eq-bench_v2_pl,169.0,1,transformers, ,,
openchat-gemma,2024-06-19 10:16:52,,openchat/openchat-3.5-0106-gemma,,,59.93,eq-bench_v2_pl,170.0,1,transformers, ,,
openchat-gemma,2024-06-19 10:19:44,,openchat/openchat-3.5-0106-gemma,,,59.93,eq-bench_v2_pl,170.0,1,transformers, ,,
Nous-Hermes-2-SOLAR-10.7B,2024-06-19 10:27:36,,NousResearch/Nous-Hermes-2-SOLAR-10.7B,,,48.22,eq-bench_v2_pl,169.0,1,transformers, ,,
SOLAR-10.7B-Instruct-v1.0,2024-06-19 10:43:47,,upstage/SOLAR-10.7B-Instruct-v1.0,,,57.57,eq-bench_v2_pl,164.0,1,transformers, ,,
Qwen2-7B-Instruct,2024-06-19 10:46:52,,Qwen/Qwen2-7B-Instruct,,,53.08,eq-bench_v2_pl,171.0,1,transformers, ,,
models/gwint2,2024-06-19 11:21:15,,speakleash/Bielik-11B-v2.0-Instruct,,,68.24,eq-bench_v2_pl,171.0,1,transformers, ,,
Azurro/APT3-275M-Base,2024-06-19 11:36:43,,Azurro/APT3-275M-Base,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
Qwen/Qwen2-0.5B,2024-06-19 11:47:44,,Qwen/Qwen2-0.5B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,18.0 questions were parseable (min is 83%)
Qwen/Qwen2-0.5B-Instruct,2024-06-19 11:51:21,,Qwen/Qwen2-0.5B-Instruct,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,125.0 questions were parseable (min is 83%)
allegro/plt5-large,2024-06-19 11:51:22,,allegro/plt5-large,,,FAILED,eq-bench,FAILED,1,transformers, ,,Unrecognized configuration class <class 'transformers.models.t5.configuration_t5.T5Config'> for this kind of AutoModel: AutoModelForCausalLM. Model type should be one of BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, CpmAntConfig, CTRLConfig, Data2VecTextConfig, DbrxConfig, ElectraConfig, ErnieConfig, FalconConfig, FuyuConfig, GemmaConfig, GitConfig, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GPTJConfig, JambaConfig, JetMoeConfig, LlamaConfig, MambaConfig, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MistralConfig, MixtralConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, OlmoConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, PLBartConfig, ProphetNetConfig, QDQBertConfig, Qwen2Config, Qwen2MoeConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, Speech2Text2Config, StableLmConfig, Starcoder2Config, TransfoXLConfig, TrOCRConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMProphetNetConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, XmodConfig.
APT3-1B-Instruct-e1,2024-06-19 11:51:22,,APT3-1B-Instruct-e1,,,FAILED,eq-bench,FAILED,1,transformers, ,,APT3-1B-Instruct-e1 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
APT3-1B-Instruct-e2,2024-06-19 11:51:23,,APT3-1B-Instruct-e2,,,FAILED,eq-bench,FAILED,1,transformers, ,,APT3-1B-Instruct-e2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Azurro/APT3-1B-Base,2024-06-19 12:00:40,,Azurro/APT3-1B-Base,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
OPI-PG/Qra-1b,2024-06-19 12:13:15,,OPI-PG/Qra-1b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
TinyLlama/TinyLlama-1.1B-Chat-v1.0,2024-06-19 12:23:45,,TinyLlama/TinyLlama-1.1B-Chat-v1.0,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,36.0 questions were parseable (min is 83%)
Qwen/Qwen2-1.5B,2024-06-19 12:35:37,,Qwen/Qwen2-1.5B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,54.0 questions were parseable (min is 83%)
Qwen/Qwen2-1.5B-Instruct,2024-06-19 12:38:29,,Qwen/Qwen2-1.5B-Instruct,,,15.33,eq-bench_v2_pl,165.0,1,transformers, ,,
sdadas/polish-gpt2-xl,2024-06-19 12:54:39,,sdadas/polish-gpt2-xl,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
internlm/internlm2-1_8b,2024-06-19 13:08:50,,internlm/internlm2-1_8b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
internlm/internlm2-chat-1_8b,2024-06-19 13:13:21,,internlm/internlm2-chat-1_8b,,,13.83,eq-bench_v2_pl,150.0,1,transformers, ,,
google/gemma-1.1-2b-it,2024-06-19 13:15:24,,google/gemma-1.1-2b-it,,,16.47,eq-bench_v2_pl,171.0,1,transformers, ,,
microsoft/phi-2,2024-06-19 13:28:07,,microsoft/phi-2,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
google/mt5-xl,2024-06-19 13:28:10,,google/mt5-xl,,,FAILED,eq-bench,FAILED,1,transformers, ,,Unrecognized configuration class <class 'transformers.models.mt5.configuration_mt5.MT5Config'> for this kind of AutoModel: AutoModelForCausalLM. Model type should be one of BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, CpmAntConfig, CTRLConfig, Data2VecTextConfig, DbrxConfig, ElectraConfig, ErnieConfig, FalconConfig, FuyuConfig, GemmaConfig, GitConfig, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GPTJConfig, JambaConfig, JetMoeConfig, LlamaConfig, MambaConfig, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MistralConfig, MixtralConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, OlmoConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, PLBartConfig, ProphetNetConfig, QDQBertConfig, Qwen2Config, Qwen2MoeConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, Speech2Text2Config, StableLmConfig, Starcoder2Config, TransfoXLConfig, TrOCRConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMProphetNetConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, XmodConfig, InternLM2Config, InternLM2Config.
microsoft/Phi-3-mini-4k-instruct,2024-06-19 13:34:56,,microsoft/Phi-3-mini-4k-instruct,,,28.05,eq-bench_v2_pl,159.0,1,transformers, ,,
ssmits/Falcon2-5.5B-Polish,2024-06-19 13:47:21,,ssmits/Falcon2-5.5B-Polish,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
01-ai/Yi-1.5-6B,2024-06-19 14:04:20,,01-ai/Yi-1.5-6B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,1.0 questions were parseable (min is 83%)
01-ai/Yi-1.5-6B-Chat,2024-06-19 14:11:22,,01-ai/Yi-1.5-6B-Chat,,,5.19,eq-bench_v2_pl,161.0,1,transformers, ,,
THUDM/chatglm3-6b,2024-06-19 14:12:11,,THUDM/chatglm3-6b,,,FAILED,eq-bench,FAILED,1,transformers, ,,too many values to unpack (expected 2)
THUDM/chatglm3-6b-base,2024-06-19 14:13:00,,THUDM/chatglm3-6b-base,,,FAILED,eq-bench,FAILED,1,transformers, ,,too many values to unpack (expected 2)
alpindale/Mistral-7B-v0.2-hf,2024-06-19 14:16:37,,alpindale/Mistral-7B-v0.2-hf,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,45.0 questions were parseable (min is 83%)
berkeley-nest/Starling-LM-7B-alpha,2024-06-19 14:22:32,,berkeley-nest/Starling-LM-7B-alpha,,,46.26,eq-bench_v2_pl,171.0,1,transformers, ,,
google/gemma-7b,2024-06-19 14:38:02,,google/gemma-7b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
google/gemma-7b-it,2024-06-19 14:53:28,,google/gemma-7b-it,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
HuggingFaceH4/zephyr-7b-alpha,2024-06-19 15:05:31,,HuggingFaceH4/zephyr-7b-alpha,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,99.0 questions were parseable (min is 83%)
HuggingFaceH4/zephyr-7b-beta,2024-06-19 15:18:24,,HuggingFaceH4/zephyr-7b-beta,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,88.0 questions were parseable (min is 83%)
internlm/internlm2-7b,2024-06-19 15:36:06,,internlm/internlm2-7b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,43.0 questions were parseable (min is 83%)
internlm/internlm2-base-7b,2024-06-19 15:54:53,,internlm/internlm2-base-7b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,6.0 questions were parseable (min is 83%)
internlm/internlm2-chat-7b,2024-06-19 16:02:07,,internlm/internlm2-chat-7b,,,40.0,eq-bench_v2_pl,169.0,1,transformers, ,,
internlm/internlm2-chat-7b-sft,2024-06-19 16:07:04,,internlm/internlm2-chat-7b-sft,,,41.62,eq-bench_v2_pl,170.0,1,transformers, ,,
lex-hue/Delexa-7b,2024-06-19 16:12:19,,lex-hue/Delexa-7b,,,49.03,eq-bench_v2_pl,169.0,1,transformers, ,,
meta-llama/Llama-2-7b-chat-hf,2024-06-19 16:21:08,,meta-llama/Llama-2-7b-chat-hf,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,116.0 questions were parseable (min is 83%)
meta-llama/Llama-2-7b-hf,2024-06-19 16:36:41,,meta-llama/Llama-2-7b-hf,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,1.0 questions were parseable (min is 83%)
microsoft/WizardLM-2-7B,2024-06-19 16:44:22,,microsoft/WizardLM-2-7B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,137.0 questions were parseable (min is 83%)
mistralai/Mistral-7B-Instruct-v0.1,2024-06-19 16:44:33,,mistralai/Mistral-7B-Instruct-v0.1,,,FAILED,eq-bench,FAILED,1,transformers, ,,Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)
mistralai/Mistral-7B-Instruct-v0.2,2024-06-19 16:50:36,,mistralai/Mistral-7B-Instruct-v0.2,,,53.25,eq-bench_v2_pl,151.0,1,transformers, ,,
mistralai/Mistral-7B-Instruct-v0.3,2024-06-19 16:54:49,,mistralai/Mistral-7B-Instruct-v0.3,,,45.21,eq-bench_v2_pl,171.0,1,transformers, ,,
mistralai/Mistral-7B-v0.1,2024-06-19 16:59:50,,mistralai/Mistral-7B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,65.0 questions were parseable (min is 83%)
mistralai/Mistral-7B-v0.3,2024-06-19 17:16:38,,mistralai/Mistral-7B-v0.3,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,14.0 questions were parseable (min is 83%)
Nexusflow/Starling-LM-7B-beta,2024-06-19 17:23:18,,Nexusflow/Starling-LM-7B-beta,,,45.1,eq-bench_v2_pl,166.0,1,transformers, ,,
openchat/openchat-3.5-0106,2024-06-19 17:27:10,,openchat/openchat-3.5-0106,,,43.81,eq-bench_v2_pl,171.0,1,transformers, ,,
openchat/openchat-3.5-0106-gemma,2024-06-19 17:30:31,,openchat/openchat-3.5-0106-gemma,,,58.62,eq-bench_v2_pl,169.0,1,transformers, ,,
openchat/openchat-3.5-1210,2024-06-19 17:34:27,,openchat/openchat-3.5-1210,,,49.04,eq-bench_v2_pl,171.0,1,transformers, ,,
OPI-PG/Qra-7b,2024-06-19 17:50:28,,OPI-PG/Qra-7b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
Qwen/Qwen1.5-7B,2024-06-19 17:57:53,,Qwen/Qwen1.5-7B,,,23.11,eq-bench_v2_pl,155.0,1,transformers, ,,
Qwen/Qwen1.5-7B-Chat,2024-06-19 18:03:34,,Qwen/Qwen1.5-7B-Chat,,,25.0,eq-bench_v2_pl,164.0,1,transformers, ,,
Qwen/Qwen2-7B,2024-06-19 18:09:23,,Qwen/Qwen2-7B,,,36.58,eq-bench_v2_pl,166.0,1,transformers, ,,
Qwen/Qwen2-7B-Instruct,2024-06-19 18:12:42,,Qwen/Qwen2-7B-Instruct,,,53.74,eq-bench_v2_pl,171.0,1,transformers, ,,
Remek/Kruk-7B-SP-001,2024-06-19 18:17:13,,Remek/Kruk-7B-SP-001,,,44.44,eq-bench_v2_pl,171.0,1,transformers, ,,
Remek/OpenChat-3.5-0106-PL-Omnibusv2,2024-06-19 18:17:24,,Remek/OpenChat-3.5-0106-PL-Omnibusv2,,,FAILED,eq-bench,FAILED,1,transformers, ,,'system_message' is undefined
Remek/OpenChat3.5-0106-Spichlerz-Bocian,2024-06-19 18:24:08,,Remek/OpenChat3.5-0106-Spichlerz-Bocian,,,44.13,eq-bench_v2_pl,166.0,1,transformers, ,,
Remek/OpenChat3.5-0106-Spichlerz-Inst-001,2024-06-19 18:28:48,,Remek/OpenChat3.5-0106-Spichlerz-Inst-001,,,41.6,eq-bench_v2_pl,171.0,1,transformers, ,,
RWKV/HF_v5-Eagle-7B,2024-06-19 19:16:27,,RWKV/HF_v5-Eagle-7B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
RWKV/v5-Eagle-7B-HF,2024-06-19 20:04:12,,RWKV/v5-Eagle-7B-HF,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
speakleash/Bielik-7B-v0.1,2024-06-19 20:11:16,,speakleash/Bielik-7B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,139.0 questions were parseable (min is 83%)
szymonrucinski/Curie-7B-v1,2024-06-19 20:29:24,,szymonrucinski/Curie-7B-v1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,1.0 questions were parseable (min is 83%)
teknium/OpenHermes-2.5-Mistral-7B,2024-06-19 20:34:12,,teknium/OpenHermes-2.5-Mistral-7B,,,37.48,eq-bench_v2_pl,171.0,1,transformers, ,,
Voicelab/trurl-2-7b,2024-06-19 20:39:26,,Voicelab/trurl-2-7b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,141.0 questions were parseable (min is 83%)
microsoft/Phi-3-small-8k-instruct,2024-06-19 20:39:31,,microsoft/Phi-3-small-8k-instruct,,,FAILED,eq-bench,FAILED,1,transformers, ,,No module named 'pytest'
CohereForAI/aya-23-8B,2024-06-19 20:44:01,,CohereForAI/aya-23-8B,,,45.43,eq-bench_v2_pl,171.0,1,transformers, ,,
meta-llama/Meta-Llama-3-8B,2024-06-19 21:01:55,,meta-llama/Meta-Llama-3-8B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
meta-llama/Meta-Llama-3-8B-Instruct,2024-06-19 21:06:08,,meta-llama/Meta-Llama-3-8B-Instruct,,,46.27,eq-bench_v2_pl,171.0,1,transformers, ,,
mlabonne/NeuralDaredevil-8B-abliterated,2024-06-19 21:13:31,,mlabonne/NeuralDaredevil-8B-abliterated,,,54.74,eq-bench_v2_pl,171.0,1,transformers, ,,
NousResearch/Hermes-2-Pro-Llama-3-8B,2024-06-19 21:18:18,,NousResearch/Hermes-2-Pro-Llama-3-8B,,,54.57,eq-bench_v2_pl,171.0,1,transformers, ,,
NousResearch/Hermes-2-Theta-Llama-3-8B,2024-06-19 21:25:22,,NousResearch/Hermes-2-Theta-Llama-3-8B,,,54.88,eq-bench_v2_pl,171.0,1,transformers, ,,
nvidia/Llama3-ChatQA-1.5-8B,2024-06-19 22:27:24,,nvidia/Llama3-ChatQA-1.5-8B,,,40.55,eq-bench_v2_pl,166.0,1,transformers, ,,
openchat/openchat-3.6-8b-20240522,2024-06-19 22:34:56,,openchat/openchat-3.6-8b-20240522,,,-2.0090659464796595e+18,eq-bench_v2_pl,170.0,1,transformers, ,,
Remek/Llama-3-8B-Omnibus-1-PL-v01-INSTRUCT,2024-06-19 22:39:46,,Remek/Llama-3-8B-Omnibus-1-PL-v01-INSTRUCT,,,26.63,eq-bench_v2_pl,171.0,1,transformers, ,,
01-ai/Yi-1.5-9B,2024-06-19 23:07:56,,01-ai/Yi-1.5-9B,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,1.0 questions were parseable (min is 83%)
01-ai/Yi-1.5-9B-Chat,2024-06-19 23:19:16,,01-ai/Yi-1.5-9B-Chat,,,48.78,eq-bench_v2_pl,163.0,1,transformers, ,,
google/recurrentgemma-9b-it,2024-06-19 23:28:19,,google/recurrentgemma-9b-it,,,52.82,eq-bench_v2_pl,171.0,1,transformers, ,,
THUDM/glm-4-9b,2024-06-19 23:28:41,,THUDM/glm-4-9b,,,FAILED,eq-bench,FAILED,1,transformers, ,,too many values to unpack (expected 2)
THUDM/glm-4-9b-chat,2024-06-19 23:29:01,,THUDM/glm-4-9b-chat,,,FAILED,eq-bench,FAILED,1,transformers, ,,too many values to unpack (expected 2)
NousResearch/Nous-Hermes-2-SOLAR-10.7B,2024-06-19 23:51:07,,NousResearch/Nous-Hermes-2-SOLAR-10.7B,,,49.85,eq-bench_v2_pl,169.0,1,transformers, ,,
TeeZee/Bielik-SOLAR-LIKE-10.7B-Instruct-v0.1,2024-06-20 00:00:02,,TeeZee/Bielik-SOLAR-LIKE-10.7B-Instruct-v0.1,,,35.63,eq-bench_v2_pl,164.0,1,transformers, ,,
upstage/SOLAR-10.7B-Instruct-v1.0,2024-06-20 00:19:48,,upstage/SOLAR-10.7B-Instruct-v1.0,,,57.35,eq-bench_v2_pl,162.0,1,transformers, ,,
upstage/SOLAR-10.7B-v1.0,2024-06-20 01:12:51,,upstage/SOLAR-10.7B-v1.0,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,1.0 questions were parseable (min is 83%)
tiiuae/falcon-11B,2024-06-20 01:23:54,,tiiuae/falcon-11B,,,42.41,eq-bench_v2_pl,171.0,1,transformers, ,,
lmsys/vicuna-13b-v1.5,2024-06-20 01:43:40,,lmsys/vicuna-13b-v1.5,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,84.0 questions were parseable (min is 83%)
OPI-PG/Qra-13b,2024-06-20 02:07:48,,OPI-PG/Qra-13b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,0.0 questions were parseable (min is 83%)
teknium/OpenHermes-13B,2024-06-20 02:32:04,,teknium/OpenHermes-13B,,,36.85,eq-bench_v2_pl,162.0,1,transformers, ,,
Voicelab/trurl-2-13b-academic,2024-06-20 02:38:04,,Voicelab/trurl-2-13b-academic,,,25.92,eq-bench_v2_pl,162.0,1,transformers, ,,
microsoft/Phi-3-medium-4k-instruct,2024-06-20 02:46:38,,microsoft/Phi-3-medium-4k-instruct,,,57.07,eq-bench_v2_pl,169.0,1,transformers, ,,
Qwen/Qwen1.5-14B-Chat,2024-06-20 02:52:13,,Qwen/Qwen1.5-14B-Chat,,,51.26,eq-bench_v2_pl,160.0,1,transformers, ,,
internlm/internlm2-20b,2024-06-20 09:04:33,,internlm/internlm2-20b,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,4.0 questions were parseable (min is 83%)
internlm/internlm2-chat-20b,2024-06-20 09:47:11,,internlm/internlm2-chat-20b,,,36.52,eq-bench_v2_pl,170.0,1,transformers, ,,
Qwen/Qwen1.5-32B,2024-06-20 13:25:12,,Qwen/Qwen1.5-32B,,,54.35,eq-bench_v2_pl,170.0,1,transformers, ,,
Qwen/Qwen1.5-32B-Chat,2024-06-20 13:34:52,,Qwen/Qwen1.5-32B-Chat,,,60.69,eq-bench_v2_pl,168.0,1,transformers, ,,
01-ai/Yi-1.5-34B-Chat,2024-06-20 13:51:30,,01-ai/Yi-1.5-34B-Chat,,,46.32,eq-bench_v2_pl,171.0,1,transformers, ,,
CohereForAI/aya-23-35B,2024-06-20 14:03:07,,CohereForAI/aya-23-35B,,,58.41,eq-bench_v2_pl,171.0,1,transformers, ,,
CohereForAI/c4ai-command-r-v01,2024-06-20 14:14:54,,CohereForAI/c4ai-command-r-v01,,,56.43,eq-bench_v2_pl,171.0,1,transformers, ,,
mistralai/Mixtral-8x7B-Instruct-v0.1,2024-06-20 14:35:28,,mistralai/Mixtral-8x7B-Instruct-v0.1,,,58.64,eq-bench_v2_pl,168.0,1,transformers, ,,
mistralai/Mixtral-8x7B-v0.1,2024-06-20 15:30:24,,mistralai/Mixtral-8x7B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,10.0 questions were parseable (min is 83%)
Qwen/Qwen2-57B-A14B-Instruct,2024-06-20 16:19:41,,Qwen/Qwen2-57B-A14B-Instruct,,,57.64,eq-bench_v2_pl,171.0,1,transformers, ,,
meta-llama/Meta-Llama-3-70B,2024-06-20 16:59:30,,meta-llama/Meta-Llama-3-70B,,,46.1,eq-bench_v2_pl,145.0,1,transformers, ,,
meta-llama/Meta-Llama-3-70B-Instruct,2024-06-20 17:15:58,,meta-llama/Meta-Llama-3-70B-Instruct,,,71.21,eq-bench_v2_pl,171.0,1,transformers, ,,
Qwen/Qwen1.5-72B,2024-06-20 17:50:17,,Qwen/Qwen1.5-72B,,,53.96,eq-bench_v2_pl,163.0,1,transformers, ,,
Qwen/Qwen1.5-72B-Chat,2024-06-20 18:06:58,,Qwen/Qwen1.5-72B-Chat,,,68.03,eq-bench_v2_pl,171.0,1,transformers, ,,
Qwen/Qwen2-72B,2024-06-20 18:36:22,,Qwen/Qwen2-72B,,,69.75,eq-bench_v2_pl,169.0,1,transformers, ,,
Qwen/Qwen2-72B-Instruct,2024-06-20 18:55:02,,Qwen/Qwen2-72B-Instruct,,,72.07,eq-bench_v2_pl,169.0,1,transformers, ,,
mistralai/Mixtral-8x22B-v0.1,2024-06-21 20:20:37,,mistralai/Mixtral-8x22B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,34.0 questions were parseable (min is 83%)
mistralai/Mixtral-8x22B-Instruct-v0.1,2024-06-26 23:40:01,,mistralai/Mixtral-8x22B-Instruct-v0.1,,,67.63,eq-bench_v2_pl,171.0,1,transformers, ,,
mistralai/Mixtral-8x22B-v0.1,2024-06-27 01:17:13,,mistralai/Mixtral-8x22B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,50.0 questions were parseable (min is 83%)
alpindale/WizardLM-2-8x22B,2024-06-27 01:50:42,,alpindale/WizardLM-2-8x22B,,,69.56,eq-bench_v2_pl,171.0,1,transformers, ,,
Bielik_v2.2b,2024-08-24 09:54:33,,speakleash/Bielik-11B-v2.2-Instruct,,,69.05,eq-bench_v2_pl,171.0,1,transformers, ,,
Bielik_v2.1,2024-08-24 10:07:46,,speakleash/Bielik-11B-v2.1-Instruct,,,66.27,eq-bench_v2_pl,155.0,1,transformers, ,,
meta-llama/Meta-Llama-3.1-70B-Instruct,2024-08-24 21:24:39,,meta-llama/Meta-Llama-3.1-70B-Instruct,,,FAILED,eq-bench,FAILED,1,transformers, ,,`rope_scaling` must be a dictionary with two fields, `type` and `factor`, got {'factor': 8.0, 'low_freq_factor': 1.0, 'high_freq_factor': 4.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}
mistralai/Mistral-Large-Instruct-2407,2024-08-24 21:51:53,,mistralai/Mistral-Large-Instruct-2407,,,78.07,eq-bench_v2_pl,171.0,1,transformers, ,,
meta-llama/Meta-Llama-3.1-70B-Instruct,2024-08-24 22:23:40,,meta-llama/Meta-Llama-3.1-70B-Instruct,,,72.53,eq-bench_v2_pl,171.0,1,transformers, ,,
meta-llama/Meta-Llama-3.1-405B-Instruct-FP8,2024-08-25 20:59:04,openai_api,meta-llama/Meta-Llama-3.1-405B-Instruct-FP8,,,77.23,eq-bench_v2_pl,171.0,1,openai,,,
gpt-3.5-turbo,2024-08-25 21:14:25,openai_api,gpt-3.5-turbo,,,57.7,eq-bench_v2_pl,171.0,1,openai,,,
gpt-4o-mini-2024-07-18,2024-08-25 21:17:34,openai_api,gpt-4o-mini-2024-07-18,,,71.15,eq-bench_v2_pl,171.0,1,openai,,,
gpt-4o-2024-08-06,2024-08-25 21:24:35,openai_api,gpt-4o-2024-08-06,,,75.15,eq-bench_v2_pl,171.0,1,openai,,,
gpt-4-turbo-2024-04-09,2024-08-25 21:31:42,openai_api,gpt-4-turbo-2024-04-09,,,77.77,eq-bench_v2_pl,164.0,1,openai,,,
Bielik_v2.3,2024-09-14 10:40:57,,speakleash/Bielik-11B-v2.3-Instruct,,,70.86,eq-bench_v2_pl,171.0,1,transformers, ,,
|