Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
•
4df8d2a
1
Parent(s):
a97c4f0
add logic to update with new models from elo data
Browse files- app.py +20 -17
- release_date_mapping.json +5 -0
- utils.py +73 -1
app.py
CHANGED
@@ -6,10 +6,11 @@ import plotly.express as px
|
|
6 |
|
7 |
from utils import (
|
8 |
KEY_TO_CATEGORY_NAME,
|
9 |
-
PROPRIETARY_LICENSES,
|
10 |
CAT_NAME_TO_EXPLANATION,
|
11 |
download_latest_data_from_space,
|
12 |
get_constants,
|
|
|
|
|
13 |
)
|
14 |
|
15 |
###################
|
@@ -36,10 +37,25 @@ latest_leaderboard_file_local = download_latest_data_from_space(
|
|
36 |
)
|
37 |
leaderboard_df = pd.read_csv(latest_leaderboard_file_local)
|
38 |
|
|
|
|
|
|
|
39 |
###################
|
40 |
### Prepare Data
|
41 |
###################
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# merge leaderboard data with ELO data
|
44 |
merged_dfs = {}
|
45 |
for k, v in arena_dfs.items():
|
@@ -50,33 +66,18 @@ for k, v in arena_dfs.items():
|
|
50 |
)
|
51 |
|
52 |
# add release dates into the merged data
|
53 |
-
release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
|
54 |
for k, v in merged_dfs.items():
|
55 |
merged_dfs[k] = pd.merge(
|
56 |
merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
|
57 |
)
|
58 |
|
59 |
-
|
60 |
# format dataframes
|
61 |
-
def format_data(df):
|
62 |
-
df["License"] = df["License"].apply(
|
63 |
-
lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
|
64 |
-
)
|
65 |
-
df["Release Date"] = pd.to_datetime(df["Release Date"])
|
66 |
-
df["Month-Year"] = df["Release Date"].dt.to_period("M")
|
67 |
-
df["rating"] = df["rating"].round()
|
68 |
-
return df.reset_index(drop=True)
|
69 |
-
|
70 |
-
|
71 |
merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
|
72 |
|
73 |
-
|
74 |
# get constants
|
75 |
min_elo_score, max_elo_score, upper_models_per_month = get_constants(merged_dfs)
|
76 |
-
|
77 |
date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
|
78 |
|
79 |
-
|
80 |
###################
|
81 |
### Plot Data
|
82 |
###################
|
@@ -100,6 +101,7 @@ def build_plot(min_score, max_models_per_month, toggle_annotations, set_selector
|
|
100 |
.reset_index(drop=True)
|
101 |
)
|
102 |
|
|
|
103 |
fig = px.scatter(
|
104 |
filtered_df,
|
105 |
x="Release Date",
|
@@ -153,7 +155,8 @@ with gr.Blocks(
|
|
153 |
<h1 style="font-weight: 900; margin-top: 5px;">🔬 Progress Tracker: Open vs. Proprietary LLMs
|
154 |
</h1>
|
155 |
<p style="text-align: left; margin-top: 10px; margin-bottom: 10px; line-height: 20px;">
|
156 |
-
This app visualizes the progress of proprietary and open-source LLMs in the LMSYS Arena ELO leaderboard
|
|
|
157 |
</p>
|
158 |
</div>
|
159 |
"""
|
|
|
6 |
|
7 |
from utils import (
|
8 |
KEY_TO_CATEGORY_NAME,
|
|
|
9 |
CAT_NAME_TO_EXPLANATION,
|
10 |
download_latest_data_from_space,
|
11 |
get_constants,
|
12 |
+
update_release_date_mapping,
|
13 |
+
format_data,
|
14 |
)
|
15 |
|
16 |
###################
|
|
|
37 |
)
|
38 |
leaderboard_df = pd.read_csv(latest_leaderboard_file_local)
|
39 |
|
40 |
+
# load release date mapping data
|
41 |
+
release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
|
42 |
+
|
43 |
###################
|
44 |
### Prepare Data
|
45 |
###################
|
46 |
|
47 |
+
# update release date mapping with new models
|
48 |
+
# check for new models in ELO data
|
49 |
+
new_model_keys_to_add = [
|
50 |
+
model
|
51 |
+
for model in arena_dfs["Overall"].index.to_list()
|
52 |
+
if model not in release_date_mapping["key"].to_list()
|
53 |
+
]
|
54 |
+
if new_model_keys_to_add:
|
55 |
+
release_date_mapping = update_release_date_mapping(
|
56 |
+
new_model_keys_to_add, leaderboard_df, release_date_mapping
|
57 |
+
)
|
58 |
+
|
59 |
# merge leaderboard data with ELO data
|
60 |
merged_dfs = {}
|
61 |
for k, v in arena_dfs.items():
|
|
|
66 |
)
|
67 |
|
68 |
# add release dates into the merged data
|
|
|
69 |
for k, v in merged_dfs.items():
|
70 |
merged_dfs[k] = pd.merge(
|
71 |
merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
|
72 |
)
|
73 |
|
|
|
74 |
# format dataframes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
merged_dfs = {k: format_data(v) for k, v in merged_dfs.items()}
|
76 |
|
|
|
77 |
# get constants
|
78 |
min_elo_score, max_elo_score, upper_models_per_month = get_constants(merged_dfs)
|
|
|
79 |
date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
|
80 |
|
|
|
81 |
###################
|
82 |
### Plot Data
|
83 |
###################
|
|
|
101 |
.reset_index(drop=True)
|
102 |
)
|
103 |
|
104 |
+
# construct plot
|
105 |
fig = px.scatter(
|
106 |
filtered_df,
|
107 |
x="Release Date",
|
|
|
155 |
<h1 style="font-weight: 900; margin-top: 5px;">🔬 Progress Tracker: Open vs. Proprietary LLMs
|
156 |
</h1>
|
157 |
<p style="text-align: left; margin-top: 10px; margin-bottom: 10px; line-height: 20px;">
|
158 |
+
This app visualizes the progress of proprietary and open-source LLMs in the LMSYS Arena ELO leaderboard over time.
|
159 |
+
The idea is inspired by <a href="https://www.linkedin.com/posts/maxime-labonne_arena-elo-graph-updated-with-new-models-activity-7187062633735368705-u2jB?utm_source=share&utm_medium=member_desktop">this great work</a> from <a href="https://huggingface.co/mlabonne/">Maxime Labonne</a>.
|
160 |
</p>
|
161 |
</div>
|
162 |
"""
|
release_date_mapping.json
CHANGED
@@ -453,5 +453,10 @@
|
|
453 |
"key": "llama-13b",
|
454 |
"Model": "LLaMA-13B",
|
455 |
"Release Date": "2023-02-27"
|
|
|
|
|
|
|
|
|
|
|
456 |
}
|
457 |
]
|
|
|
453 |
"key": "llama-13b",
|
454 |
"Model": "LLaMA-13B",
|
455 |
"Release Date": "2023-02-27"
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"key": "snowflake-arctic-instruct",
|
459 |
+
"Model": "Snowflake Arctic Instruct",
|
460 |
+
"Release Date": "2024-04-24"
|
461 |
}
|
462 |
]
|
utils.py
CHANGED
@@ -1,5 +1,9 @@
|
|
1 |
-
|
|
|
2 |
|
|
|
|
|
|
|
3 |
from huggingface_hub import HfFileSystem, hf_hub_download
|
4 |
|
5 |
KEY_TO_CATEGORY_NAME = {
|
@@ -95,3 +99,71 @@ def get_constants(dfs):
|
|
95 |
upper_models_per_month, value["upper_models_per_month"]
|
96 |
)
|
97 |
return min_elo_score, max_elo_score, upper_models_per_month
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from datetime import datetime
|
3 |
|
4 |
+
from typing import Literal, List
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
from huggingface_hub import HfFileSystem, hf_hub_download
|
8 |
|
9 |
KEY_TO_CATEGORY_NAME = {
|
|
|
99 |
upper_models_per_month, value["upper_models_per_month"]
|
100 |
)
|
101 |
return min_elo_score, max_elo_score, upper_models_per_month
|
102 |
+
|
103 |
+
|
104 |
+
def update_release_date_mapping(
|
105 |
+
new_model_keys_to_add: List[str],
|
106 |
+
leaderboard_df: pd.DataFrame,
|
107 |
+
release_date_mapping: pd.DataFrame,
|
108 |
+
) -> pd.DataFrame:
|
109 |
+
"""
|
110 |
+
Update the release date mapping with new model keys.
|
111 |
+
|
112 |
+
Args:
|
113 |
+
new_model_keys_to_add (List[str]): A list of new model keys to add to the release date mapping.
|
114 |
+
leaderboard_df (pd.DataFrame): The leaderboard DataFrame containing the model information.
|
115 |
+
release_date_mapping (pd.DataFrame): The current release date mapping DataFrame.
|
116 |
+
|
117 |
+
Returns:
|
118 |
+
pd.DataFrame: The updated release date mapping DataFrame.
|
119 |
+
"""
|
120 |
+
# if any, add those to the release date mapping
|
121 |
+
if new_model_keys_to_add:
|
122 |
+
for key in new_model_keys_to_add:
|
123 |
+
new_entry = {
|
124 |
+
"key": key,
|
125 |
+
"Model": leaderboard_df[leaderboard_df["key"] == key]["Model"].values[
|
126 |
+
0
|
127 |
+
],
|
128 |
+
"Release Date": datetime.today().strftime("%Y-%m-%d"),
|
129 |
+
}
|
130 |
+
|
131 |
+
with open("release_date_mapping.json", "r") as file:
|
132 |
+
data = json.load(file)
|
133 |
+
|
134 |
+
data.append(new_entry)
|
135 |
+
|
136 |
+
with open("release_date_mapping.json", "w") as file:
|
137 |
+
json.dump(data, file, indent=4)
|
138 |
+
|
139 |
+
print(f"Added {key} to release_date_mapping.json")
|
140 |
+
|
141 |
+
# reload the release date mapping
|
142 |
+
release_date_mapping = pd.read_json(
|
143 |
+
"release_date_mapping.json", orient="records"
|
144 |
+
)
|
145 |
+
return release_date_mapping
|
146 |
+
|
147 |
+
|
148 |
+
def format_data(df):
|
149 |
+
"""
|
150 |
+
Formats the given DataFrame by performing the following operations:
|
151 |
+
- Converts the 'License' column values to 'Proprietary LLM' if they are in PROPRIETARY_LICENSES, otherwise 'Open LLM'.
|
152 |
+
- Converts the 'Release Date' column to datetime format.
|
153 |
+
- Adds a new 'Month-Year' column by extracting the month and year from the 'Release Date' column.
|
154 |
+
- Rounds the 'rating' column to the nearest integer.
|
155 |
+
- Resets the index of the DataFrame.
|
156 |
+
|
157 |
+
Args:
|
158 |
+
df (pandas.DataFrame): The DataFrame to be formatted.
|
159 |
+
|
160 |
+
Returns:
|
161 |
+
pandas.DataFrame: The formatted DataFrame.
|
162 |
+
"""
|
163 |
+
df["License"] = df["License"].apply(
|
164 |
+
lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
|
165 |
+
)
|
166 |
+
df["Release Date"] = pd.to_datetime(df["Release Date"])
|
167 |
+
df["Month-Year"] = df["Release Date"].dt.to_period("M")
|
168 |
+
df["rating"] = df["rating"].round()
|
169 |
+
return df.reset_index(drop=True)
|