import os from email.utils import parseaddr import gradio as gr import pandas as pd from datasets import load_dataset from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import HfApi # InfoStrings from content import * BALM_TOKEN = os.environ.get("BALM_TOKEN", None) owner="clefourrier" # change to balm once possible api = HfApi() eval_results = {} for level in range(1, 4): eval_results[level] = load_dataset(f"{owner}/BALM_ResultsLevel{level}", use_auth_token=BALM_TOKEN, split="dev") eval_dataframe_1 = pd.DataFrame(eval_results[1].remove_columns("mail")) eval_dataframe_2 = pd.DataFrame(eval_results[2].remove_columns("mail")) eval_dataframe_3 = pd.DataFrame(eval_results[3].remove_columns("mail")) def restart_space(): api.restart_space(repo_id=f"{owner}/BALM_Leaderboard", token=BALM_TOKEN) COLS = ["Model", "Organisation", "Reported accuracy ⬆️"] TYPES = ["str", "str", "number",] def add_new_eval( level_of_dev: str, model: str, score: float, organisation: str, mail: str, ): level = int(level_of_dev.split(" ")[-1]) # Very basic email parsing _, parsed_mail = parseaddr(mail) if not "@" in parsed_mail: valid_mail = "Please provide a valid email adress." return f"

{valid_mail}

" print("Adding new eval") # Check if the combination model/org already exists and prints a warning message if yes if model.lower() in set(eval_results[level]["model"]) and organisation.lower() in set(eval_results[level]["organisation"]): duplicate_request_message = "This model has been already submitted." return f"

{duplicate_request_message}

" # Actual submission eval_entry = { "model": model, "score": score, "organisation": organisation, "mail": mail, } eval_results[level] = eval_results[level].add_item(eval_entry) eval_results[level].push_to_hub(f"{owner}/BALM_ResultsLevel{level}", token=BALM_TOKEN, split="dev") success_message = f"Model {model} submitted by {organisation}." return f"

{success_message}

" def refresh(): eval_results = {} for level in range(1, 4): eval_results[level] = load_dataset(f"{owner}/BALM_ResultsLevel{level}", use_auth_token=BALM_TOKEN, split="dev") eval_dataframe_1 = pd.DataFrame(eval_results[1].remove_columns("mail")) eval_dataframe_2 = pd.DataFrame(eval_results[2].remove_columns("mail")) eval_dataframe_3 = pd.DataFrame(eval_results[3].remove_columns("mail")) return eval_dataframe_1, eval_dataframe_2, eval_dataframe_3 custom_css = """ #changelog-text { font-size: 16px !important; } #changelog-text h2 { font-size: 18px !important; } .markdown-text { font-size: 16px !important; } #citation-button span { font-size: 16px !important; } #citation-button textarea { font-size: 16px !important; } #citation-button > label > button { margin: 6px; transform: scale(1.3); } """ demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Column(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", ).style(show_copy_button=True) with gr.Column(): with gr.Accordion("✨ CHANGELOG", open=False): changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text") with gr.Tab("Results: Level 1"): with gr.Tab("Results on Dev Set"): leaderboard_table_1 = gr.components.Dataframe( value=eval_dataframe_1, headers=COLS, datatype=TYPES, max_rows=20 ) with gr.Tab("Results on Test Set"): gr.Textbox(label="Info", value="The test set is currently private! Come back when performances on the dev set increased!") with gr.Tab("Results: Level 2"): with gr.Tab("Results on Dev Set"): leaderboard_table_2 = gr.components.Dataframe( value=eval_dataframe_2, headers=COLS, datatype=TYPES, max_rows=20 ) with gr.Tab("Results on Test Set"): gr.Textbox(label="Info", value="The test set is currently private! Come back when performances on the dev set increased!") with gr.Tab("Results: Level 3"): with gr.Tab("Results on Dev Set"): leaderboard_table_3 = gr.components.Dataframe( value=eval_dataframe_3, headers=COLS, datatype=TYPES, max_rows=20 ) with gr.Tab("Results on Test Set"): gr.Textbox(label="Info", value="The test set is currently private! Come back when performances on the dev set increased!") refresh_button = gr.Button("Refresh") refresh_button.click( refresh, inputs=[], outputs=[ leaderboard_table_1, leaderboard_table_2, leaderboard_table_3, ], ) with gr.Accordion("Submit a new model for evaluation"): with gr.Row(): with gr.Column(): level_of_dev = gr.Radio(["Level 1", "Level 2", "Level 3"], value="Level 1", label="Dev set") model_name_textbox = gr.Textbox(label="Model name") score = gr.Textbox(label="Score") with gr.Column(): organisation = gr.Textbox(label="Organisation") mail = gr.Textbox(label="Contact email") submit_button = gr.Button("Submit Eval") submission_result = gr.Markdown() submit_button.click( add_new_eval, [ level_of_dev, model_name_textbox, score, organisation, mail ], submission_result, ) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=3600) scheduler.start() demo.launch()