b1sheng's picture
Update app.py
299bf86
raw
history blame
3.69 kB
import json
import os
from datetime import datetime, timezone
import gradio as gr
import numpy as np
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi
from transformers import AutoConfig
from src.auto_leaderboard.get_model_metadata import apply_metadata
from src.assets.text_content import *
from src.auto_leaderboard.load_results import get_eval_results_dicts, make_clickable_model
from src.assets.hardcoded_evals import gpt4_values, gpt35_values, baseline
from src.assets.css_html_js import custom_css, get_window_url_params
from src.utils_display import AutoEvalColumn, EvalQueueColumn, fields, styled_error, styled_warning, styled_message
from src.init import get_all_requested_models, load_all_info_from_hub
def get_leaderboard_df():
data = {
'Datasets': ['metrics','SOTA(FT)', 'SOTA(ZS)', 'FLAN-T5', 'GPT-3', 'GPT-3.5v2', 'GPT-3.5v3', 'ChatGPT', 'GPT-4'],
'KQApro': ['Acc','93.85', '94.20', '37.27', '38.28', '38.01', '40.35', '47.93', '57.20'],
'LC-quad2': ['F1','33.10', '-', '30.14', '33.04', '33.77', '39.04', '42.76', '54.95'],
'WQSP': ['Acc','73.10', '62.98', '59.87', '67.68', '72.34', '79.60', '83.70', '90.45'],
'CWQ': ['Acc','72.20', '-', '46.69', '51.77', '53.96', '57.54', '64.02', '71.00'],
'GrailQA': ['Acc','76.31', '-', '29.02', '27.58', '30.50', '35.43', '46.77', '51.40'],
'GraphQ': ['Acc','41.30', '-', '32.27', '38.32', '40.85', '47.95', '53.10', '63.20'],
'QALD-9': ['F1','67.82', '-', '30.17', '38.54', '44.96', '46.19', '45.71', '57.20'],
'MKQA': ['Acc','46.00', '-', '20.17', '26.97', '30.14', '39.05', '44.30', '59.20']
}
df = pd.DataFrame(data)
return df
original_df = get_leaderboard_df()
leaderboard_df = original_df.copy()
def search_table(df, query):
if query == "":
return df
else:
return df[df.apply(lambda row: query.lower() in row.astype(str).lower(), axis=1).any()]
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Box(elem_id="search-bar-table-box"):
search_bar = gr.Textbox(
placeholder="πŸ” Search your model and press ENTER...",
show_label=False,
elem_id="search-bar",
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=1):
leaderboard_table = gr.components.Dataframe(
value=leaderboard_df,
max_rows=None,
elem_id="leaderboard-table",
)
# Dummy leaderboard for handling the case when the user uses backspace key
hidden_leaderboard_table_for_search = gr.components.Dataframe(
value=original_df,
max_rows=None,
visible=False,
)
search_bar.submit(
search_table,
[hidden_leaderboard_table_for_search, search_bar],
leaderboard_table,
)
with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
).style(show_copy_button=True)
demo.queue(concurrency_count=40).launch()