Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import pickle | |
import pandas as pd | |
import gradio as gr | |
from huggingface_hub import HfFileSystem, hf_hub_download | |
if gr.NO_RELOAD: | |
################### | |
### Load Data | |
################### | |
key_to_category_name = { | |
"full": "Overall", | |
"coding": "Coding", | |
"long_user": "Longer Query", | |
"english": "English", | |
"chinese": "Chinese", | |
"french": "French", | |
"no_tie": "Exclude Ties", | |
"no_short": "Exclude Short Query (< 5 tokens)", | |
"no_refusal": "Exclude Refusal", | |
} | |
cat_name_to_explanation = { | |
"Overall": "Overall Questions", | |
"Coding": "Coding: whether conversation contains code snippets", | |
"Longer Query": "Longer Query (>= 500 tokens)", | |
"English": "English Prompts", | |
"Chinese": "Chinese Prompts", | |
"French": "French Prompts", | |
"Exclude Ties": "Exclude Ties and Bothbad", | |
"Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)", | |
"Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")', | |
} | |
fs = HfFileSystem() | |
def extract_date(filename): | |
return filename.split("/")[-1].split(".")[0].split("_")[-1] | |
# gather ELO data | |
ELO_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.pkl" | |
elo_files = fs.glob(ELO_DATA_FILES) | |
latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0] | |
latest_elo_file_local = hf_hub_download( | |
repo_id="lmsys/chatbot-arena-leaderboard", | |
filename=latest_elo_file.split("/")[-1], | |
repo_type="space", | |
) | |
with open(latest_elo_file_local, "rb") as fin: | |
elo_results = pickle.load(fin) | |
arena_dfs = {} | |
for k in key_to_category_name.keys(): | |
if k not in elo_results: | |
continue | |
arena_dfs[key_to_category_name[k]] = elo_results[k]["leaderboard_table_df"] | |
# gather open llm leaderboard data | |
LEADERBOARD_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.csv" | |
leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES) | |
latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[ | |
0 | |
] | |
latest_leaderboard_file_local = hf_hub_download( | |
repo_id="lmsys/chatbot-arena-leaderboard", | |
filename=latest_leaderboard_file.split("/")[-1], | |
repo_type="space", | |
) | |
leaderboard_df = pd.read_csv(latest_leaderboard_file_local) | |
################### | |
### Prepare Data | |
################### | |
# merge leaderboard data with ELO data | |
merged_dfs = {} | |
for k, v in arena_dfs.items(): | |
merged_dfs[k] = ( | |
pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on="key") | |
.sort_values("rating", ascending=False) | |
.reset_index(drop=True) | |
) | |
# add release dates into the merged data | |
release_date_mapping = pd.read_json("release_date_mapping.json", orient="records") | |
for k, v in merged_dfs.items(): | |
merged_dfs[k] = pd.merge( | |
merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key" | |
) | |
df = merged_dfs["Overall"] | |
y_min = df["rating"].min() | |
y_max = df["rating"].max() | |
y_buffer = (y_max - y_min) * 0.1 | |
with gr.Blocks() as demo: | |
gr.Markdown("# Chatbot Arena Leaderboard") | |
with gr.Row(): | |
gr.ScatterPlot( | |
df, | |
title="hello", | |
x="Release Date", | |
y="rating", | |
tooltip=["Model", "rating", "num_battles", "Organization", "License"], | |
width=1000, | |
height=700, | |
x_label_angle=-45, | |
y_lim=[y_min - y_buffer, y_max + y_buffer], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |