import pickle import pandas as pd import gradio as gr from huggingface_hub import HfFileSystem, hf_hub_download if gr.NO_RELOAD: ################### ### Load Data ################### key_to_category_name = { "full": "Overall", "coding": "Coding", "long_user": "Longer Query", "english": "English", "chinese": "Chinese", "french": "French", "no_tie": "Exclude Ties", "no_short": "Exclude Short Query (< 5 tokens)", "no_refusal": "Exclude Refusal", } cat_name_to_explanation = { "Overall": "Overall Questions", "Coding": "Coding: whether conversation contains code snippets", "Longer Query": "Longer Query (>= 500 tokens)", "English": "English Prompts", "Chinese": "Chinese Prompts", "French": "French Prompts", "Exclude Ties": "Exclude Ties and Bothbad", "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)", "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")', } fs = HfFileSystem() def extract_date(filename): return filename.split("/")[-1].split(".")[0].split("_")[-1] # gather ELO data ELO_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.pkl" elo_files = fs.glob(ELO_DATA_FILES) latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0] latest_elo_file_local = hf_hub_download( repo_id="lmsys/chatbot-arena-leaderboard", filename=latest_elo_file.split("/")[-1], repo_type="space", ) with open(latest_elo_file_local, "rb") as fin: elo_results = pickle.load(fin) arena_dfs = {} for k in key_to_category_name.keys(): if k not in elo_results: continue arena_dfs[key_to_category_name[k]] = elo_results[k]["leaderboard_table_df"] # gather open llm leaderboard data LEADERBOARD_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.csv" leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES) latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[ 0 ] latest_leaderboard_file_local = hf_hub_download( repo_id="lmsys/chatbot-arena-leaderboard", filename=latest_leaderboard_file.split("/")[-1], repo_type="space", ) leaderboard_df = pd.read_csv(latest_leaderboard_file_local) ################### ### Prepare Data ################### # merge leaderboard data with ELO data merged_dfs = {} for k, v in arena_dfs.items(): merged_dfs[k] = ( pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on="key") .sort_values("rating", ascending=False) .reset_index(drop=True) ) # add release dates into the merged data release_date_mapping = pd.read_json("release_date_mapping.json", orient="records") for k, v in merged_dfs.items(): merged_dfs[k] = pd.merge( merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key" ) df = merged_dfs["Overall"] y_min = df["rating"].min() y_max = df["rating"].max() y_buffer = (y_max - y_min) * 0.1 with gr.Blocks() as demo: gr.Markdown("# Chatbot Arena Leaderboard") with gr.Row(): gr.ScatterPlot( df, title="hello", x="Release Date", y="rating", tooltip=["Model", "rating", "num_battles", "Organization", "License"], width=1000, height=700, x_label_angle=-45, y_lim=[y_min - y_buffer, y_max + y_buffer], ) if __name__ == "__main__": demo.launch()