{ "cells": [ { "cell_type": "markdown", "id": "929c711d-3953-4fa4-82a7-5bca07e344cd", "metadata": {}, "source": [ "# Load Libraries" ] }, { "cell_type": "code", "execution_count": 43, "id": "2b17a5c1-b120-4a52-b897-f9668d19b91f", "metadata": {}, "outputs": [], "source": [ "import datasets\n", "import huggingface_hub\n", "import matplotlib.patches as mpatches\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "from tqdm.notebook import tqdm" ] }, { "cell_type": "code", "execution_count": 44, "id": "d90734cd-4208-4b06-9d67-b30e021e8cad", "metadata": {}, "outputs": [], "source": [ "pd.set_option(\"display.max_columns\", None)" ] }, { "cell_type": "markdown", "id": "77bb9548-7e8f-4691-bb6e-63de41318464", "metadata": {}, "source": [ "## Load and Preprocess V1" ] }, { "cell_type": "code", "execution_count": 45, "id": "3220654b-2c87-422e-a336-047e94730cc9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7260, 26)" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load the v1 JSONL file\n", "ds = datasets.load_dataset(\"open-llm-leaderboard-old/contents\", split=\"train\")\n", "data_v1 = ds.to_pandas()\n", "data_v1.shape" ] }, { "cell_type": "code", "execution_count": 46, "id": "6da7bed7-1c7e-4461-8cf0-372fecfe0393", "metadata": {}, "outputs": [], "source": [ "# Drop contaminated models\n", "data_v1 = data_v1[~data_v1.eval_name.str.contains(\"contaminated\")]" ] }, { "cell_type": "code", "execution_count": 47, "id": "5fe0a4fb-e363-499c-a886-f2dbe2ad6a4d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7258, 26)" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_v1.shape" ] }, { "cell_type": "code", "execution_count": 48, "id": "60a10bec-5bfb-4ab9-9280-2ca323574827", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | eval_name | \n", "Precision | \n", "Type | \n", "T | \n", "Weight type | \n", "Architecture | \n", "Model | \n", "fullname | \n", "Model sha | \n", "Average ⬆️ | \n", "Hub License | \n", "Hub ❤️ | \n", "#Params (B) | \n", "Available on the hub | \n", "Merged | \n", "MoE | \n", "Flagged | \n", "date | \n", "Chat Template | \n", "ARC | \n", "HellaSwag | \n", "MMLU | \n", "TruthfulQA | \n", "Winogrande | \n", "GSM8K | \n", "Maintainers Choice | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0-hero_Matter-0.1-7B_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B | \n", "035c8193ce71be90be7d90098669afb9164ec6cb | \n", "63.391248 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-21T06:05:50Z | \n", "False | \n", "61.774744 | \n", "82.135033 | \n", "62.423731 | \n", "42.439513 | \n", "77.821626 | \n", "53.752843 | \n", "False | \n", "
1 | \n", "0-hero_Matter-0.1-7B-DPO-preview_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-DPO-preview | \n", "78040e4754051df49dd907cf1fd46a6b8a6cc30f | \n", "64.870290 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-23T04:13:58Z | \n", "False | \n", "62.713311 | \n", "82.991436 | \n", "62.700299 | \n", "45.790101 | \n", "78.847672 | \n", "56.178923 | \n", "False | \n", "
2 | \n", "0-hero_Matter-0.1-7B-boost_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost | \n", "ba56089eed1211f02e8d0ff47901e77b0cd48f83 | \n", "63.223517 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-21T06:05:38Z | \n", "False | \n", "62.627986 | \n", "81.507668 | \n", "61.967618 | \n", "54.702404 | \n", "75.927388 | \n", "42.608036 | \n", "False | \n", "
3 | \n", "0-hero_Matter-0.1-7B-boost-DPO_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost-DPO | \n", "5bee9978fcf2188f1070b67f6d94be344fdd99c0 | \n", "65.985858 | \n", "\n", " | 0 | \n", "7 | \n", "False | \n", "True | \n", "True | \n", "True | \n", "2024-03-22T15:02:21Z | \n", "False | \n", "65.017065 | \n", "83.081060 | \n", "61.873805 | \n", "60.293632 | \n", "75.611681 | \n", "50.037908 | \n", "False | \n", "
4 | \n", "0-hero_Matter-0.1-7B-boost-DPO-preview_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost-DPO-preview | \n", "d390fb35a781129efd26d53f7ecdb513c0c3da27 | \n", "65.767435 | \n", "apache-2.0 | \n", "2 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-22T07:40:42Z | \n", "False | \n", "64.590444 | \n", "82.871938 | \n", "62.017625 | \n", "58.859162 | \n", "75.848461 | \n", "50.416983 | \n", "False | \n", "
\n", " | eval_name | \n", "Precision | \n", "Type | \n", "T | \n", "Weight type | \n", "Architecture | \n", "Model | \n", "fullname | \n", "Model sha | \n", "Average ⬆️ | \n", "Hub License | \n", "Hub ❤️ | \n", "#Params (B) | \n", "Available on the hub | \n", "Merged | \n", "MoE | \n", "Flagged | \n", "date | \n", "Chat Template | \n", "IFEval Raw | \n", "IFEval | \n", "BBH Raw | \n", "BBH | \n", "MATH Lvl 5 Raw | \n", "MATH Lvl 5 | \n", "GPQA Raw | \n", "GPQA | \n", "MUSR Raw | \n", "MUSR | \n", "MMLU-PRO Raw | \n", "MMLU-PRO | \n", "Maintainer's Highlight | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "upstage_SOLAR-10.7B-v1.0_float16 | \n", "float16 | \n", "🟢 pretrained | \n", "🟢 | \n", "Original | \n", "LlamaForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "upstage/SOLAR-10.7B-v1.0 | \n", "a45090b8e56bdc2b8e32e46b3cd782fc0bea1fa5 | \n", "17.072003 | \n", "apache-2.0 | \n", "248 | \n", "10 | \n", "True | \n", "True | \n", "True | \n", "False | \n", "2024-06-12T12:27:42Z | \n", "False | \n", "0.242126 | \n", "24.212645 | \n", "0.509387 | \n", "29.789358 | \n", "0.021148 | \n", "2.114804 | \n", "0.281040 | \n", "4.138702 | \n", "0.437156 | \n", "13.677865 | \n", "0.340010 | \n", "26.667775 | \n", "True | \n", "
1 | \n", "upstage_SOLAR-10.7B-Instruct-v1.0_float16 | \n", "float16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "LlamaForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "upstage/SOLAR-10.7B-Instruct-v1.0 | \n", "c08c25ed66414a878fe0401a3596d536c083606c | \n", "19.961989 | \n", "cc-by-nc-4.0 | \n", "592 | \n", "10 | \n", "True | \n", "True | \n", "True | \n", "False | \n", "2024-06-12T12:06:58Z | \n", "True | \n", "0.473661 | \n", "47.366100 | \n", "0.516249 | \n", "31.872402 | \n", "0.000000 | \n", "0.000000 | \n", "0.308725 | \n", "7.829978 | \n", "0.389937 | \n", "6.942188 | \n", "0.313830 | \n", "23.758865 | \n", "True | \n", "
2 | \n", "togethercomputer_RedPajama-INCITE-Instruct-3B-... | \n", "float16 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "🔶 | \n", "Original | \n", "GPTNeoXForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "togethercomputer/RedPajama-INCITE-Instruct-3B-v1 | \n", "0c66778ee09a036886741707733620b91057909a | \n", "5.877290 | \n", "apache-2.0 | \n", "91 | \n", "3 | \n", "True | \n", "True | \n", "True | \n", "False | \n", "2024-06-12T12:07:46Z | \n", "False | \n", "0.212426 | \n", "21.242636 | \n", "0.314602 | \n", "4.510786 | \n", "0.006042 | \n", "0.604230 | \n", "0.247483 | \n", "0.000000 | \n", "0.388604 | \n", "6.408854 | \n", "0.110954 | \n", "1.217125 | \n", "True | \n", "
3 | \n", "togethercomputer_RedPajama-INCITE-Chat-3B-v1_f... | \n", "float16 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "🔶 | \n", "Original | \n", "GPTNeoXForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "togethercomputer/RedPajama-INCITE-Chat-3B-v1 | \n", "f0e0995eba801096ed04cb87931d96a8316871af | \n", "4.950649 | \n", "apache-2.0 | \n", "147 | \n", "3 | \n", "True | \n", "True | \n", "True | \n", "False | \n", "2024-06-13T17:58:59Z | \n", "False | \n", "0.165215 | \n", "16.521496 | \n", "0.321669 | \n", "5.164728 | \n", "0.003021 | \n", "0.302115 | \n", "0.244128 | \n", "0.000000 | \n", "0.368448 | \n", "5.089323 | \n", "0.112699 | \n", "1.411052 | \n", "True | \n", "
4 | \n", "togethercomputer_RedPajama-INCITE-Base-3B-v1_f... | \n", "float16 | \n", "🟢 pretrained | \n", "🟢 | \n", "Original | \n", "GPTNeoXForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "togethercomputer/RedPajama-INCITE-Base-3B-v1 | \n", "094fbdd0c911feb485ce55de1952ab2e75277e1e | \n", "5.645099 | \n", "apache-2.0 | \n", "90 | \n", "3 | \n", "True | \n", "True | \n", "True | \n", "False | \n", "2024-06-12T12:28:23Z | \n", "False | \n", "0.229363 | \n", "22.936254 | \n", "0.306040 | \n", "3.518608 | \n", "0.009063 | \n", "0.906344 | \n", "0.243289 | \n", "0.000000 | \n", "0.373875 | \n", "4.001042 | \n", "0.111120 | \n", "1.235594 | \n", "True | \n", "
\n", " | eval_name | \n", "Precision | \n", "Type | \n", "T | \n", "Weight type | \n", "Architecture | \n", "Model | \n", "fullname | \n", "Model sha | \n", "Average ⬆️ | \n", "Hub License | \n", "Hub ❤️ | \n", "#Params (B) | \n", "Available on the hub | \n", "Merged | \n", "MoE | \n", "Flagged | \n", "date | \n", "Chat Template | \n", "ARC | \n", "HellaSwag | \n", "MMLU | \n", "TruthfulQA | \n", "Winogrande | \n", "GSM8K | \n", "Maintainers Choice | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0-hero_Matter-0.1-7B_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B | \n", "035c8193ce71be90be7d90098669afb9164ec6cb | \n", "63.391248 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-20 05:57:38+00:00 | \n", "False | \n", "61.774744 | \n", "82.135033 | \n", "62.423731 | \n", "42.439513 | \n", "77.821626 | \n", "53.752843 | \n", "False | \n", "
1 | \n", "0-hero_Matter-0.1-7B-DPO-preview_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-DPO-preview | \n", "78040e4754051df49dd907cf1fd46a6b8a6cc30f | \n", "64.870290 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-19 11:27:26+00:00 | \n", "False | \n", "62.713311 | \n", "82.991436 | \n", "62.700299 | \n", "45.790101 | \n", "78.847672 | \n", "56.178923 | \n", "False | \n", "
2 | \n", "0-hero_Matter-0.1-7B-boost_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost | \n", "ba56089eed1211f02e8d0ff47901e77b0cd48f83 | \n", "63.223517 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-19 11:26:56+00:00 | \n", "False | \n", "62.627986 | \n", "81.507668 | \n", "61.967618 | \n", "54.702404 | \n", "75.927388 | \n", "42.608036 | \n", "False | \n", "
3 | \n", "0-hero_Matter-0.1-7B-boost-DPO_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost-DPO | \n", "5bee9978fcf2188f1070b67f6d94be344fdd99c0 | \n", "65.985858 | \n", "\n", " | 0 | \n", "7 | \n", "False | \n", "True | \n", "True | \n", "True | \n", "2024-03-22T15:02:21Z | \n", "False | \n", "65.017065 | \n", "83.081060 | \n", "61.873805 | \n", "60.293632 | \n", "75.611681 | \n", "50.037908 | \n", "False | \n", "
4 | \n", "0-hero_Matter-0.1-7B-boost-DPO-preview_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost-DPO-preview | \n", "d390fb35a781129efd26d53f7ecdb513c0c3da27 | \n", "65.767435 | \n", "apache-2.0 | \n", "2 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-21 13:04:58+00:00 | \n", "False | \n", "64.590444 | \n", "82.871938 | \n", "62.017625 | \n", "58.859162 | \n", "75.848461 | \n", "50.416983 | \n", "False | \n", "
\n", " | Type | \n", "IFEval | \n", "MATH Lvl 5 | \n", "Average ⬆️ | \n", "fullname | \n", "
---|---|---|---|---|---|
0 | \n", "🟢 pretrained | \n", "24.212645 | \n", "2.114804 | \n", "17.072003 | \n", "upstage/SOLAR-10.7B-v1.0 | \n", "
1 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "47.366100 | \n", "0.000000 | \n", "19.961989 | \n", "upstage/SOLAR-10.7B-Instruct-v1.0 | \n", "
2 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "21.242636 | \n", "0.604230 | \n", "5.877290 | \n", "togethercomputer/RedPajama-INCITE-Instruct-3B-v1 | \n", "
3 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "16.521496 | \n", "0.302115 | \n", "4.950649 | \n", "togethercomputer/RedPajama-INCITE-Chat-3B-v1 | \n", "
4 | \n", "🟢 pretrained | \n", "22.936254 | \n", "0.906344 | \n", "5.645099 | \n", "togethercomputer/RedPajama-INCITE-Base-3B-v1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
201 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "48.023023 | \n", "12.537764 | \n", "22.405532 | \n", "01-ai/Yi-1.5-6B-Chat | \n", "
202 | \n", "🟢 pretrained | \n", "26.106065 | \n", "5.664653 | \n", "16.778059 | \n", "01-ai/Yi-1.5-6B | \n", "
203 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "60.667584 | \n", "23.338369 | \n", "33.076818 | \n", "01-ai/Yi-1.5-34B-Chat | \n", "
204 | \n", "🟢 pretrained | \n", "31.186917 | \n", "13.444109 | \n", "26.787600 | \n", "01-ai/Yi-1.5-34B-32K | \n", "
205 | \n", "🟢 pretrained | \n", "28.411725 | \n", "14.048338 | \n", "25.812197 | \n", "01-ai/Yi-1.5-34B | \n", "
204 rows × 5 columns
\n", "\n", " | eval_name | \n", "Precision | \n", "Type | \n", "T | \n", "Weight type | \n", "Architecture | \n", "Model | \n", "fullname | \n", "Model sha | \n", "Average ⬆️ | \n", "Hub License | \n", "Hub ❤️ | \n", "#Params (B) | \n", "Available on the hub | \n", "Merged | \n", "MoE | \n", "Flagged | \n", "date | \n", "Chat Template | \n", "ARC | \n", "HellaSwag | \n", "MMLU | \n", "TruthfulQA | \n", "Winogrande | \n", "GSM8K | \n", "Maintainers Choice | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0-hero_Matter-0.1-7B_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B | \n", "035c8193ce71be90be7d90098669afb9164ec6cb | \n", "63.391248 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-20 05:57:38+00:00 | \n", "False | \n", "61.774744 | \n", "82.135033 | \n", "62.423731 | \n", "42.439513 | \n", "77.821626 | \n", "53.752843 | \n", "False | \n", "
1 | \n", "0-hero_Matter-0.1-7B-DPO-preview_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-DPO-preview | \n", "78040e4754051df49dd907cf1fd46a6b8a6cc30f | \n", "64.870290 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-19 11:27:26+00:00 | \n", "False | \n", "62.713311 | \n", "82.991436 | \n", "62.700299 | \n", "45.790101 | \n", "78.847672 | \n", "56.178923 | \n", "False | \n", "
2 | \n", "0-hero_Matter-0.1-7B-boost_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost | \n", "ba56089eed1211f02e8d0ff47901e77b0cd48f83 | \n", "63.223517 | \n", "apache-2.0 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-19 11:26:56+00:00 | \n", "False | \n", "62.627986 | \n", "81.507668 | \n", "61.967618 | \n", "54.702404 | \n", "75.927388 | \n", "42.608036 | \n", "False | \n", "
3 | \n", "0-hero_Matter-0.1-7B-boost-DPO_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost-DPO | \n", "5bee9978fcf2188f1070b67f6d94be344fdd99c0 | \n", "65.985858 | \n", "\n", " | 0 | \n", "7 | \n", "False | \n", "True | \n", "True | \n", "True | \n", "2024-03-22T15:02:21Z | \n", "False | \n", "65.017065 | \n", "83.081060 | \n", "61.873805 | \n", "60.293632 | \n", "75.611681 | \n", "50.037908 | \n", "False | \n", "
4 | \n", "0-hero_Matter-0.1-7B-boost-DPO-preview_bfloat16 | \n", "bfloat16 | \n", "💬 chat models (RLHF, DPO, IFT, ...) | \n", "💬 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "0-hero/Matter-0.1-7B-boost-DPO-preview | \n", "d390fb35a781129efd26d53f7ecdb513c0c3da27 | \n", "65.767435 | \n", "apache-2.0 | \n", "2 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-03-21 13:04:58+00:00 | \n", "False | \n", "64.590444 | \n", "82.871938 | \n", "62.017625 | \n", "58.859162 | \n", "75.848461 | \n", "50.416983 | \n", "False | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
7255 | \n", "zorobin_mistral-class-shishya-7b-ep3_bfloat16 | \n", "bfloat16 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "🔶 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "zorobin/mistral-class-shishya-7b-ep3 | \n", "e85b73ce67deaa5b40633c5ce2545b23fa3ff3a0 | \n", "44.276427 | \n", "llama2 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-01-28T04:36:16Z | \n", "False | \n", "46.587031 | \n", "76.618204 | \n", "39.065575 | \n", "33.537715 | \n", "69.850039 | \n", "0.000000 | \n", "False | \n", "
7256 | \n", "zorobin_mistral-class-shishya-all-hal-7b-ep3_b... | \n", "bfloat16 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "🔶 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "zorobin/mistral-class-shishya-all-hal-7b-ep3 | \n", "8f15bc3f0d0235fdb67a8dfb6be36a1ac9c1b8b8 | \n", "44.802767 | \n", "llama2 | \n", "0 | \n", "7 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2024-01-28T04:36:49Z | \n", "False | \n", "46.587031 | \n", "78.868751 | \n", "34.450351 | \n", "35.982292 | \n", "72.928177 | \n", "0.000000 | \n", "False | \n", "
7257 | \n", "zyh3826_20231206094523-pretrain-Llama-2-13b-hf... | \n", "bfloat16 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "🔶 | \n", "Original | \n", "LlamaForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "zyh3826/20231206094523-pretrain-Llama-2-13b-hf... | \n", "28b3ae089b5610053f2294d24667fe248405f031 | \n", "35.580577 | \n", "llama2 | \n", "0 | \n", "13 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2023-12-14T02:54:25Z | \n", "False | \n", "31.058020 | \n", "52.031468 | \n", "24.434487 | \n", "44.712448 | \n", "61.247040 | \n", "0.000000 | \n", "False | \n", "
7258 | \n", "zyh3826_GML-Mistral-merged-v1_bfloat16 | \n", "bfloat16 | \n", "🔶 fine-tuned on domain-specific datasets | \n", "🔶 | \n", "Original | \n", "MistralForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "zyh3826/GML-Mistral-merged-v1 | \n", "17a3d5eb5dc23b8a7c29d33cfcd07140a083aa1f | \n", "73.295247 | \n", "apache-2.0 | \n", "3 | \n", "8 | \n", "True | \n", "False | \n", "True | \n", "True | \n", "2023-12-23T11:32:47Z | \n", "False | \n", "71.245734 | \n", "87.880900 | \n", "65.417680 | \n", "69.275013 | \n", "80.978690 | \n", "64.973465 | \n", "False | \n", "
7259 | \n", "zyh3826_llama2-13b-ft-openllm-leaderboard-v1_f... | \n", "float16 | \n", "\n", " | ? | \n", "Original | \n", "LlamaForCausalLM | \n", "<a target=\"_blank\" href=\"https://huggingface.c... | \n", "zyh3826/llama2-13b-ft-openllm-leaderboard-v1 | \n", "70404059013c74b0641ed69d293b3d1ad708cd1e | \n", "53.858973 | \n", "llama2 | \n", "1 | \n", "13 | \n", "True | \n", "True | \n", "True | \n", "True | \n", "2023-12-07T03:44:45Z | \n", "False | \n", "59.641638 | \n", "83.140809 | \n", "60.934970 | \n", "40.723683 | \n", "77.348066 | \n", "1.364670 | \n", "False | \n", "
7258 rows × 26 columns
\n", "\n", " | fullname | \n", "MMLU | \n", "GSM8K | \n", "MMLU-PRO | \n", "MATH Lvl 5 | \n", "GPQA | \n", "
---|---|---|---|---|---|---|
0 | \n", "01-ai/Yi-1.5-34B | \n", "77.995719 | \n", "73.237301 | \n", "40.732122 | \n", "14.048338 | \n", "15.436242 | \n", "
1 | \n", "01-ai/Yi-1.5-34B-32K | \n", "78.153032 | \n", "0.000000 | \n", "41.212323 | \n", "13.444109 | \n", "15.100671 | \n", "
2 | \n", "01-ai/Yi-1.5-34B-Chat | \n", "77.082840 | \n", "71.645186 | \n", "39.116061 | \n", "23.338369 | \n", "15.324385 | \n", "
3 | \n", "01-ai/Yi-1.5-6B | \n", "64.726895 | \n", "50.341168 | \n", "23.343307 | \n", "5.664653 | \n", "8.277405 | \n", "
4 | \n", "01-ai/Yi-1.5-6B | \n", "65.002720 | \n", "49.810462 | \n", "23.343307 | \n", "5.664653 | \n", "8.277405 | \n", "
\n", " | fullname | \n", "MMLU | \n", "GSM8K | \n", "MMLU-PRO | \n", "MATH Lvl 5 | \n", "GPQA | \n", "
---|---|---|---|---|---|---|
27 | \n", "EleutherAI/gpt-j-6b | \n", "26.783999 | \n", "2.956785 | \n", "2.676197 | \n", "1.208459 | \n", "0.0 | \n", "
30 | \n", "EleutherAI/gpt-neox-20b | \n", "25.002595 | \n", "5.458681 | \n", "1.725030 | \n", "0.604230 | \n", "0.0 | \n", "
31 | \n", "EleutherAI/pythia-12b | \n", "26.756067 | \n", "1.743745 | \n", "1.207890 | \n", "0.906344 | \n", "0.0 | \n", "
40 | \n", "IDEA-CCNL/Ziya-LLaMA-13B-v1 | \n", "27.037992 | \n", "0.000000 | \n", "1.124778 | \n", "0.000000 | \n", "0.0 | \n", "
65 | \n", "PygmalionAI/pygmalion-6b | \n", "25.730836 | \n", "2.047005 | \n", "2.039007 | \n", "0.604230 | \n", "0.0 | \n", "
99 | \n", "bigcode/starcoder2-3b | \n", "38.648602 | \n", "19.636088 | \n", "7.071882 | \n", "1.435045 | \n", "0.0 | \n", "
103 | \n", "bigscience/bloom-3b | \n", "26.592509 | \n", "1.516300 | \n", "1.475694 | \n", "0.075529 | \n", "0.0 | \n", "
108 | \n", "databricks/dolly-v2-12b | \n", "25.916846 | \n", "1.213040 | \n", "1.429521 | \n", "1.435045 | \n", "0.0 | \n", "
116 | \n", "facebook/opt-1.3b | \n", "24.963046 | \n", "0.151630 | \n", "1.189421 | \n", "0.755287 | \n", "0.0 | \n", "
125 | \n", "google/recurrentgemma-2b | \n", "34.611141 | \n", "16.148597 | \n", "1.992834 | \n", "1.661631 | \n", "0.0 | \n", "
126 | \n", "google/recurrentgemma-2b | \n", "34.382304 | \n", "15.693707 | \n", "1.992834 | \n", "1.661631 | \n", "0.0 | \n", "
135 | \n", "meta-llama/Llama-2-13b-chat-hf | \n", "54.636271 | \n", "15.238817 | \n", "10.257831 | \n", "0.604230 | \n", "0.0 | \n", "
159 | \n", "mistralai/Mistral-7B-Instruct-v0.1 | \n", "55.375474 | \n", "14.253222 | \n", "15.336879 | \n", "1.510574 | \n", "0.0 | \n", "
185 | \n", "stabilityai/stablelm-2-1_6b | \n", "38.946573 | \n", "17.437453 | \n", "5.151079 | \n", "0.151057 | \n", "0.0 | \n", "
186 | \n", "stabilityai/stablelm-2-1_6b-chat | \n", "41.472625 | \n", "38.817286 | \n", "6.905659 | \n", "1.057402 | \n", "0.0 | \n", "
187 | \n", "stabilityai/stablelm-2-zephyr-1_6b | \n", "42.034783 | \n", "35.329795 | \n", "7.930703 | \n", "2.114804 | \n", "0.0 | \n", "
188 | \n", "stabilityai/stablelm-3b-4e1t | \n", "45.225738 | \n", "3.335861 | \n", "7.432033 | \n", "0.679758 | \n", "0.0 | \n", "
189 | \n", "stabilityai/stablelm-zephyr-3b | \n", "46.167947 | \n", "42.153146 | \n", "8.530954 | \n", "4.078550 | \n", "0.0 | \n", "
197 | \n", "tiiuae/falcon-7b | \n", "27.785470 | \n", "4.624716 | \n", "1.392583 | \n", "0.528701 | \n", "0.0 | \n", "
198 | \n", "tiiuae/falcon-7b-instruct | \n", "25.660047 | \n", "4.624716 | \n", "1.725030 | \n", "0.604230 | \n", "0.0 | \n", "
199 | \n", "tiiuae/falcon-7b-instruct | \n", "25.836964 | \n", "4.700531 | \n", "1.725030 | \n", "0.604230 | \n", "0.0 | \n", "
201 | \n", "togethercomputer/GPT-NeoXT-Chat-Base-20B | \n", "29.919101 | \n", "6.899166 | \n", "1.614214 | \n", "1.132931 | \n", "0.0 | \n", "
202 | \n", "togethercomputer/LLaMA-2-7B-32K | \n", "43.325707 | \n", "4.321456 | \n", "8.530954 | \n", "0.679758 | \n", "0.0 | \n", "
208 | \n", "togethercomputer/RedPajama-INCITE-Base-3B-v1 | \n", "27.027874 | \n", "1.288855 | \n", "1.235594 | \n", "0.906344 | \n", "0.0 | \n", "
209 | \n", "togethercomputer/RedPajama-INCITE-Chat-3B-v1 | \n", "26.231263 | \n", "0.530705 | \n", "1.411052 | \n", "0.302115 | \n", "0.0 | \n", "
210 | \n", "togethercomputer/RedPajama-INCITE-Instruct-3B-v1 | \n", "25.032214 | \n", "1.364670 | \n", "1.217125 | \n", "0.604230 | \n", "0.0 | \n", "
\n", " | eval_name | \n", "fullname | \n", "Precision | \n", "tasks | \n", "
---|---|---|---|---|
0 | \n", "upstage_SOLAR-10.7B-Instruct-v1.0_float16 | \n", "upstage/SOLAR-10.7B-Instruct-v1.0 | \n", "float16 | \n", "[MATH Lvl 5 Raw, MATH Lvl 5] | \n", "
1 | \n", "togethercomputer_RedPajama-INCITE-Instruct-3B-... | \n", "togethercomputer/RedPajama-INCITE-Instruct-3B-v1 | \n", "float16 | \n", "[GPQA] | \n", "
2 | \n", "togethercomputer_RedPajama-INCITE-Chat-3B-v1_f... | \n", "togethercomputer/RedPajama-INCITE-Chat-3B-v1 | \n", "float16 | \n", "[GPQA] | \n", "
3 | \n", "togethercomputer_RedPajama-INCITE-Base-3B-v1_f... | \n", "togethercomputer/RedPajama-INCITE-Base-3B-v1 | \n", "float16 | \n", "[GPQA] | \n", "
4 | \n", "togethercomputer_LLaMA-2-7B-32K_float16 | \n", "togethercomputer/LLaMA-2-7B-32K | \n", "float16 | \n", "[GPQA] | \n", "
\n", " | fullname | \n", "v1_score | \n", "v2_score | \n", "
---|---|---|---|
0 | \n", "01-ai/Yi-1.5-34B | \n", "73.504618 | \n", "25.812197 | \n", "
1 | \n", "01-ai/Yi-1.5-34B-32K | \n", "60.700977 | \n", "26.787600 | \n", "
2 | \n", "01-ai/Yi-1.5-34B-Chat | \n", "74.823763 | \n", "33.076818 | \n", "
3 | \n", "01-ai/Yi-1.5-6B | \n", "61.566520 | \n", "16.778059 | \n", "
5 | \n", "01-ai/Yi-1.5-6B-Chat | \n", "66.167303 | \n", "22.405532 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
208 | \n", "togethercomputer/RedPajama-INCITE-Base-3B-v1 | \n", "38.537852 | \n", "5.645099 | \n", "
209 | \n", "togethercomputer/RedPajama-INCITE-Chat-3B-v1 | \n", "39.527194 | \n", "4.950649 | \n", "
210 | \n", "togethercomputer/RedPajama-INCITE-Instruct-3B-v1 | \n", "39.055049 | \n", "5.877290 | \n", "
211 | \n", "upstage/SOLAR-10.7B-Instruct-v1.0 | \n", "74.200698 | \n", "19.961989 | \n", "
212 | \n", "upstage/SOLAR-10.7B-v1.0 | \n", "66.037836 | \n", "17.072003 | \n", "
168 rows × 3 columns
\n", "\n", " | eval_name | \n", "mean_score | \n", "IFEval | \n", "BBH | \n", "MATH Lvl 5 | \n", "GPQA | \n", "MUSR | \n", "MMLU-PRO | \n", "
---|---|---|---|---|---|---|---|---|
65 | \n", "meta-llama_Meta-Llama-3-70B-Instruct_bfloat16 | \n", "36.183402 | \n", "80.990771 | \n", "50.185133 | \n", "23.338369 | \n", "4.921700 | \n", "10.920573 | \n", "46.743868 | \n", "
125 | \n", "Qwen_Qwen2-72B-Instruct_bfloat16 | \n", "42.486308 | \n", "79.891687 | \n", "57.483009 | \n", "35.120846 | \n", "16.331096 | \n", "17.167969 | \n", "48.923242 | \n", "
66 | \n", "meta-llama_Meta-Llama-3-70B_bfloat16 | \n", "26.365471 | \n", "16.031906 | \n", "48.709813 | \n", "16.540785 | \n", "19.686801 | \n", "16.011198 | \n", "41.212323 | \n", "
61 | \n", "microsoft_Orca-2-13b_bfloat16 | \n", "18.136816 | \n", "31.279339 | \n", "27.308019 | \n", "0.981873 | \n", "4.026846 | \n", "25.787760 | \n", "19.437057 | \n", "