from functools import partial import gradio as gr from src.constants import SUBTASKS, TASKS from src.details import update_subtasks_component, update_load_details_component, load_details_dataframes, \ display_details, update_sample_idx_component, clear_details from src.results import update_load_results_component, \ load_results_dataframes, display_results, update_tasks_component, clear_results, \ sort_result_paths_per_model, fetch_result_paths # if __name__ == "__main__": result_paths_per_model = sort_result_paths_per_model(fetch_result_paths()) load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model) with gr.Blocks(fill_height=True, fill_width=True) as demo: gr.HTML("

Compare Results of the 🤗 Open LLM Leaderboard

") gr.HTML("

Select 2 models to load and compare their results

") gr.Markdown("Compare Results of the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard). " "Check out the [documentation](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about) 📄 to find explanations on the evaluations used, their configuration parameters and details on the input/outputs for the models." ) with gr.Row(): with gr.Column(): model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") dataframe_1 = gr.Dataframe(visible=False) with gr.Column(): model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models") dataframe_2 = gr.Dataframe(visible=False) with gr.Row(): with gr.Tab("Results"): load_results_btn = gr.Button("Load", interactive=False) clear_results_btn = gr.Button("Clear") results_task = gr.Radio( ["All"] + list(TASKS.values()), label="Tasks", info="Evaluation tasks to be displayed", value="All", visible=False, ) results = gr.HTML() with gr.Tab("Configs"): load_configs_btn = gr.Button("Load", interactive=False) clear_configs_btn = gr.Button("Clear") configs_task = gr.Radio( ["All"] + list(TASKS.values()), label="Tasks", info="Evaluation tasks to be displayed", value="All", visible=False, ) configs = gr.HTML() with gr.Tab("Details"): details_task = gr.Radio( list(TASKS.values()), label="Tasks", info="Evaluation tasks to be loaded", interactive=True, ) subtask = gr.Radio( SUBTASKS.get(details_task.value), label="Subtasks", info="Evaluation subtasks to be loaded (choose one of the Tasks above)", ) load_details_btn = gr.Button("Load Details", interactive=False) clear_details_btn = gr.Button("Clear Details") sample_idx = gr.Number( label="Sample Index", info="Index of the sample to be displayed", value=0, minimum=0, visible=False ) details = gr.HTML() details_dataframe_1 = gr.Dataframe(visible=False) details_dataframe_2 = gr.Dataframe(visible=False) details_dataframe = gr.DataFrame(visible=False) gr.on( triggers=[model_id_1.input, model_id_2.input], fn=update_load_results_component, outputs=[load_results_btn, load_configs_btn], ) gr.on( triggers=[load_results_btn.click, load_configs_btn.click], fn=load_results_dataframes, inputs=[model_id_1, model_id_2], outputs=[dataframe_1, dataframe_2], ).then( fn=update_tasks_component, outputs=[results_task, configs_task], ) # Synchronize the results_task and configs_task radio buttons results_task.input(fn=lambda task: task, inputs=results_task, outputs=configs_task) configs_task.input(fn=lambda task: task, inputs=configs_task, outputs=results_task) gr.on( triggers=[dataframe_1.change, dataframe_2.change, results_task.change], fn=display_results, inputs=[results_task, dataframe_1, dataframe_2], outputs=[results, configs], ) gr.on( triggers=[clear_results_btn.click, clear_configs_btn.click], fn=clear_results, outputs=[model_id_1, model_id_2, dataframe_1, dataframe_2, load_results_btn, load_configs_btn, results_task, configs_task], ) details_task.change( fn=update_subtasks_component, inputs=details_task, outputs=subtask, ) gr.on( triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input], fn=update_load_details_component, inputs=[model_id_1, model_id_2, subtask], outputs=load_details_btn, ) load_details_btn.click( fn=load_details_dataframes, inputs=[subtask, model_id_1, model_id_2], outputs=[details_dataframe_1, details_dataframe_2], ).then( fn=update_sample_idx_component, inputs=[details_dataframe_1, details_dataframe_2], outputs=sample_idx, ) gr.on( triggers=[details_dataframe_1.change, details_dataframe_2.change, sample_idx.change], fn=display_details, inputs=[sample_idx, details_dataframe_1, details_dataframe_2], outputs=details, ) clear_details_btn.click( fn=clear_details, outputs=[model_id_1, model_id_2, details_dataframe_1, details_dataframe_2, details_task, subtask, load_details_btn, sample_idx], ) demo.launch()