andrewrreed's picture
andrewrreed HF staff
init
311dc3a
raw
history blame
3.71 kB
import pickle
import pandas as pd
import gradio as gr
from huggingface_hub import HfFileSystem, hf_hub_download
if gr.NO_RELOAD:
###################
### Load Data
###################
key_to_category_name = {
"full": "Overall",
"coding": "Coding",
"long_user": "Longer Query",
"english": "English",
"chinese": "Chinese",
"french": "French",
"no_tie": "Exclude Ties",
"no_short": "Exclude Short Query (< 5 tokens)",
"no_refusal": "Exclude Refusal",
}
cat_name_to_explanation = {
"Overall": "Overall Questions",
"Coding": "Coding: whether conversation contains code snippets",
"Longer Query": "Longer Query (>= 500 tokens)",
"English": "English Prompts",
"Chinese": "Chinese Prompts",
"French": "French Prompts",
"Exclude Ties": "Exclude Ties and Bothbad",
"Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
"Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
}
fs = HfFileSystem()
def extract_date(filename):
return filename.split("/")[-1].split(".")[0].split("_")[-1]
# gather ELO data
ELO_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.pkl"
elo_files = fs.glob(ELO_DATA_FILES)
latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0]
latest_elo_file_local = hf_hub_download(
repo_id="lmsys/chatbot-arena-leaderboard",
filename=latest_elo_file.split("/")[-1],
repo_type="space",
)
with open(latest_elo_file_local, "rb") as fin:
elo_results = pickle.load(fin)
arena_dfs = {}
for k in key_to_category_name.keys():
if k not in elo_results:
continue
arena_dfs[key_to_category_name[k]] = elo_results[k]["leaderboard_table_df"]
# gather open llm leaderboard data
LEADERBOARD_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.csv"
leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES)
latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[
0
]
latest_leaderboard_file_local = hf_hub_download(
repo_id="lmsys/chatbot-arena-leaderboard",
filename=latest_leaderboard_file.split("/")[-1],
repo_type="space",
)
leaderboard_df = pd.read_csv(latest_leaderboard_file_local)
###################
### Prepare Data
###################
# merge leaderboard data with ELO data
merged_dfs = {}
for k, v in arena_dfs.items():
merged_dfs[k] = (
pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on="key")
.sort_values("rating", ascending=False)
.reset_index(drop=True)
)
# add release dates into the merged data
release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
for k, v in merged_dfs.items():
merged_dfs[k] = pd.merge(
merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
)
df = merged_dfs["Overall"]
y_min = df["rating"].min()
y_max = df["rating"].max()
y_buffer = (y_max - y_min) * 0.1
with gr.Blocks() as demo:
gr.Markdown("# Chatbot Arena Leaderboard")
with gr.Row():
gr.ScatterPlot(
df,
title="hello",
x="Release Date",
y="rating",
tooltip=["Model", "rating", "num_battles", "Organization", "License"],
width=1000,
height=700,
x_label_angle=-45,
y_lim=[y_min - y_buffer, y_max + y_buffer],
)
if __name__ == "__main__":
demo.launch()