import json import gradio as gr import pandas as pd from css_html import custom_css from text_content import CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT, HEAD_TEXT from utils import ( AutoEvalColumn, fields, ) result_path = './results.json' task_type = ["Pass@1 (Greedy Search N=1 Temperature=0.0)", "Pass@5 (Sampling Search N=5 Temperature=0.2)"] cur_task = "Pass@1" next_task = "Pass@5" def data_convert(data_pass_k : list): df = {"Model":{}} for item in data_pass_k: model_name = item["Model"] domain = item["Domain"] pass_at_k = item["Pass_at_k"] if domain not in df: df[domain] = {} assert model_name not in df[domain] df[domain][model_name] = round(pass_at_k*100, 2) df["Model"][model_name] = model_name df = pd.DataFrame(df) df = df.sort_values(by='Mean', ascending=False) return df with open(result_path, 'r') as f: data = json.load(f) df = data_convert(data["pass_1"]) df_next = data_convert(data["pass_5"]) COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden] COLS_LITE = [ c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden ] TYPES_LITE = [ c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden ] def select_columns(df, columns): always_here_cols = [ AutoEvalColumn.model.name, ] # We use COLS to maintain sorting filtered_df = df[ always_here_cols + [c for c in COLS if c in df.columns and c in columns] ] return filtered_df def select_tasks(df, columns, df_next): always_here_cols = [ AutoEvalColumn.model.name, ] df,df_next = df_next,df filtered_df = df[ always_here_cols + [c for c in COLS if c in df.columns and c in columns] ] return df,filtered_df,df_next demo = gr.Blocks(css=custom_css) with demo: with gr.Column(): gr.Markdown( """

DomainEval Leaderboard

\
\ """, elem_classes="markdown-text", ) gr.Markdown(HEAD_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.Column(): with gr.Tabs(elem_classes="A100-tabs") as A100_tabs: with gr.TabItem("🔍 Evaluation Table", id=0): with gr.Column(): with gr.Accordion("⏬ Pass@k", open=True): shown_tasks = gr.Radio( choices=[ c for c in task_type ], value=[ c for c in task_type if cur_task in c ][0] if any(cur_task in c for c in task_type) else None, label="", elem_id="task-select", interactive=True, ) with gr.Accordion("⏬ Domains", open=True): shown_languages = gr.CheckboxGroup( choices=[ c for c in COLS if c not in [ AutoEvalColumn.model.name, ] ], value=[ c for c in COLS_LITE if c not in [ AutoEvalColumn.model.name, ] ], label="", elem_id="column-select", interactive=True, ) leaderboard_df = gr.components.Dataframe( value=df[ [ AutoEvalColumn.model.name, ] + shown_languages.value ], headers=COLS, datatype=TYPES, elem_id="leaderboard-table", interactive=False, ) hidden_leaderboard_df = gr.components.Dataframe( value=df, headers=COLS, datatype=["str" for _ in range(len(COLS))], visible=False, ) leaderboard_next = gr.components.Dataframe( value=df_next, headers=COLS, datatype=["str" for _ in range(len(COLS))], visible=False, ) shown_languages.change( select_columns, [hidden_leaderboard_df, shown_languages], leaderboard_df, ) shown_tasks.change( select_tasks, [hidden_leaderboard_df, shown_languages, leaderboard_next], [hidden_leaderboard_df, leaderboard_df, leaderboard_next], ) gr.Markdown(NOTES_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=10, elem_id="citation-button", show_copy_button=True, ) with gr.Row(): with gr.Accordion("🙏 Acknowledgement", open=False): gr.Markdown(ACKNOWLEDGEMENT_TEXT) demo.launch()