Spaces:

andrewrreed
/

closed-vs-open-arena-elo

Running on CPU Upgrade

App Files Files Community

andrewrreed HF staff commited on May 6

Commit

4df8d2a

•

1 Parent(s): a97c4f0

add logic to update with new models from elo data

Browse files

Files changed (3) hide show

app.py +20 -17
release_date_mapping.json +5 -0
utils.py +73 -1

app.py CHANGED Viewed

@@ -6,10 +6,11 @@ import plotly.express as px
 from utils import (
     KEY_TO_CATEGORY_NAME,
-    PROPRIETARY_LICENSES,
     CAT_NAME_TO_EXPLANATION,
     download_latest_data_from_space,
     get_constants,
 )
 ###################
@@ -36,10 +37,25 @@ latest_leaderboard_file_local = download_latest_data_from_space(
 )
 leaderboard_df = pd.read_csv(latest_leaderboard_file_local)
 ###################
 ### Prepare Data
 ###################
 # merge leaderboard data with ELO data
 merged_dfs = {}
 for k, v in arena_dfs.items():
@@ -50,33 +66,18 @@ for k, v in arena_dfs.items():
     )
 # add release dates into the merged data
-release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
 for k, v in merged_dfs.items():
     merged_dfs[k] = pd.merge(
         merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
     )
 # format dataframes
-def format_data(df):
-    df["License"] = df["License"].apply(
-        lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
-    )
-    df["Release Date"] = pd.to_datetime(df["Release Date"])
-    df["Month-Year"] = df["Release Date"].dt.to_period("M")
-    df["rating"] = df["rating"].round()
-    return df.reset_index(drop=True)
 merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
 # get constants
 min_elo_score, max_elo_score, upper_models_per_month = get_constants(merged_dfs)
 date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
 ###################
 ### Plot Data
 ###################
@@ -100,6 +101,7 @@ def build_plot(min_score, max_models_per_month, toggle_annotations, set_selector
         .reset_index(drop=True)
     )
     fig = px.scatter(
         filtered_df,
         x="Release Date",
@@ -153,7 +155,8 @@ with gr.Blocks(
             <h1 style="font-weight: 900; margin-top: 5px;">🔬 Progress Tracker: Open vs. Proprietary LLMs
             </h1>
             <p style="text-align: left; margin-top: 10px; margin-bottom: 10px; line-height: 20px;">
-            This app visualizes the progress of proprietary and open-source LLMs in the LMSYS Arena ELO leaderboard. The idea is inspired by <a href="https://www.linkedin.com/posts/maxime-labonne_arena-elo-graph-updated-with-new-models-activity-7187062633735368705-u2jB?utm_source=share&utm_medium=member_desktop">this great work</a> from <a href="https://huggingface.co/mlabonne/">Maxime Labonne</a>.
             </p>
         </div>
         """

 from utils import (
     KEY_TO_CATEGORY_NAME,
     CAT_NAME_TO_EXPLANATION,
     download_latest_data_from_space,
     get_constants,
+    update_release_date_mapping,
+    format_data,
 )
 ###################
 )
 leaderboard_df = pd.read_csv(latest_leaderboard_file_local)
+# load release date mapping data
+release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
 ###################
 ### Prepare Data
 ###################
+# update release date mapping with new models
+# check for new models in ELO data
+new_model_keys_to_add = [
+    model
+    for model in arena_dfs["Overall"].index.to_list()
+    if model not in release_date_mapping["key"].to_list()
+]
+if new_model_keys_to_add:
+    release_date_mapping = update_release_date_mapping(
+        new_model_keys_to_add, leaderboard_df, release_date_mapping
+    )
 # merge leaderboard data with ELO data
 merged_dfs = {}
 for k, v in arena_dfs.items():
     )
 # add release dates into the merged data
 for k, v in merged_dfs.items():
     merged_dfs[k] = pd.merge(
         merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
     )
 # format dataframes
 merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
 # get constants
 min_elo_score, max_elo_score, upper_models_per_month = get_constants(merged_dfs)
 date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
 ###################
 ### Plot Data
 ###################
         .reset_index(drop=True)
     )
+    # construct plot
     fig = px.scatter(
         filtered_df,
         x="Release Date",
             <h1 style="font-weight: 900; margin-top: 5px;">🔬 Progress Tracker: Open vs. Proprietary LLMs
             </h1>
             <p style="text-align: left; margin-top: 10px; margin-bottom: 10px; line-height: 20px;">
+            This app visualizes the progress of proprietary and open-source LLMs in the LMSYS Arena ELO leaderboard over time.
+            The idea is inspired by <a href="https://www.linkedin.com/posts/maxime-labonne_arena-elo-graph-updated-with-new-models-activity-7187062633735368705-u2jB?utm_source=share&utm_medium=member_desktop">this great work</a> from <a href="https://huggingface.co/mlabonne/">Maxime Labonne</a>.
             </p>
         </div>
         """

release_date_mapping.json CHANGED Viewed

@@ -453,5 +453,10 @@
         "key": "llama-13b",
         "Model": "LLaMA-13B",
         "Release Date": "2023-02-27"
     }
 ]

         "key": "llama-13b",
         "Model": "LLaMA-13B",
         "Release Date": "2023-02-27"
+    },
+    {
+        "key": "snowflake-arctic-instruct",
+        "Model": "Snowflake Arctic Instruct",
+        "Release Date": "2024-04-24"
     }
 ]

utils.py CHANGED Viewed

@@ -1,5 +1,9 @@
-from typing import Literal
 from huggingface_hub import HfFileSystem, hf_hub_download
 KEY_TO_CATEGORY_NAME = {
@@ -95,3 +99,71 @@ def get_constants(dfs):
             upper_models_per_month, value["upper_models_per_month"]
         )
     return min_elo_score, max_elo_score, upper_models_per_month

+import json
+from datetime import datetime
+from typing import Literal, List
+import pandas as pd
 from huggingface_hub import HfFileSystem, hf_hub_download
 KEY_TO_CATEGORY_NAME = {
             upper_models_per_month, value["upper_models_per_month"]
         )
     return min_elo_score, max_elo_score, upper_models_per_month
+def update_release_date_mapping(
+    new_model_keys_to_add: List[str],
+    leaderboard_df: pd.DataFrame,
+    release_date_mapping: pd.DataFrame,
+) -> pd.DataFrame:
+    """
+    Update the release date mapping with new model keys.
+    Args:
+        new_model_keys_to_add (List[str]): A list of new model keys to add to the release date mapping.
+        leaderboard_df (pd.DataFrame): The leaderboard DataFrame containing the model information.
+        release_date_mapping (pd.DataFrame): The current release date mapping DataFrame.
+    Returns:
+        pd.DataFrame: The updated release date mapping DataFrame.
+    """
+    # if any, add those to the release date mapping
+    if new_model_keys_to_add:
+        for key in new_model_keys_to_add:
+            new_entry = {
+                "key": key,
+                "Model": leaderboard_df[leaderboard_df["key"] == key]["Model"].values[
+                    0
+                ],
+                "Release Date": datetime.today().strftime("%Y-%m-%d"),
+            }
+            with open("release_date_mapping.json", "r") as file:
+                data = json.load(file)
+            data.append(new_entry)
+            with open("release_date_mapping.json", "w") as file:
+                json.dump(data, file, indent=4)
+            print(f"Added {key} to release_date_mapping.json")
+        # reload the release date mapping
+        release_date_mapping = pd.read_json(
+            "release_date_mapping.json", orient="records"
+        )
+    return release_date_mapping
+def format_data(df):
+    """
+    Formats the given DataFrame by performing the following operations:
+    - Converts the 'License' column values to 'Proprietary LLM' if they are in PROPRIETARY_LICENSES, otherwise 'Open LLM'.
+    - Converts the 'Release Date' column to datetime format.
+    - Adds a new 'Month-Year' column by extracting the month and year from the 'Release Date' column.
+    - Rounds the 'rating' column to the nearest integer.
+    - Resets the index of the DataFrame.
+    Args:
+        df (pandas.DataFrame): The DataFrame to be formatted.
+    Returns:
+        pandas.DataFrame: The formatted DataFrame.
+    """
+    df["License"] = df["License"].apply(
+        lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
+    )
+    df["Release Date"] = pd.to_datetime(df["Release Date"])
+    df["Month-Year"] = df["Release Date"].dt.to_period("M")
+    df["rating"] = df["rating"].round()
+    return df.reset_index(drop=True)