CulturalBench / app.py
kellycyy's picture
first init
c76f18f
raw
history blame
3.59 kB
import gradio as gr
import pandas as pd
import os
from datetime import datetime, timezone
import pytz
csv_folder = "data"
setups = ["CulturalBench_Hard","CulturalBench_Easy"]
data_types = ["average","per_continent"]
with open("markdown/aboutus.md", "r") as f:
ABOUT_MD = f.read()
with open("markdown/header.md", "r") as f:
HEADER_MD = f.read()
def make_full_leaderboard_md():
leaderboard_md = """
We evaluate models by the same dataset but with in two setup: CulturalBench-Easy and CulturalBench-Hard.
See more detail on our [Paper](https://arxiv.org)
"""
leaderboard_md += f'''Human baseline: 92.4% on CulturalBench-Easy and 92.5% on CulturalBench-Hard'''
return leaderboard_md
def display_leaderboard(setup, data_type):
file_name = f"{setup}_{data_type}.csv"
file_path = os.path.join(csv_folder, file_name)
df = pd.read_csv(file_path)
return df
def build_leaderboard():
with gr.Row():
with gr.Column():
setup_radio = gr.Radio(choices=setups, label="Select Evaluation Setup:", value="CulturalBench_Hard", elem_id="setup_radio")
with gr.Column():
data_type_radio = gr.Radio(choices=data_types, label="Average or per continent:", value="average", elem_id="data_type_radio")
leaderboard_viewer = gr.DataFrame(
value=display_leaderboard("CulturalBench_Hard", "average"),
elem_id="leaderboard_viewer"
)
setup_radio.change(display_leaderboard,
inputs=[setup_radio, data_type_radio],
outputs=leaderboard_viewer)
data_type_radio.change(display_leaderboard,
inputs=[setup_radio, data_type_radio],
outputs=leaderboard_viewer)
LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S")
header_md_text = HEADER_MD.replace("{LAST_UPDATED}", str(LAST_UPDATED))
theme = gr.themes.Default.load("static/theme.json")
text_size = gr.themes.sizes.text_lg
font = [gr.themes.GoogleFont("Inconsolata"), "Arial", "sans-serif"]
theme.text_size = text_size
theme.font = font
theme.set(
button_large_text_size="30px",
button_small_text_size="30px",
button_large_text_weight="1000",
button_small_text_weight="1000",
button_shadow="*shadow_drop_lg",
button_shadow_hover="*shadow_drop_lg",
checkbox_label_shadow="*shadow_drop_lg",
button_shadow_active="*shadow_inset",
button_secondary_background_fill="*color_green",
button_secondary_background_fill_dark="*color_green",
button_secondary_background_fill_hover="*color_green_darker",
button_secondary_background_fill_hover_dark="*color_green_darker",
button_secondary_text_color="white",
button_secondary_text_color_dark="white",
)
with gr.Blocks(theme=theme) as demo:
leaderboard_md = make_full_leaderboard_md()
with gr.Row():
with gr.Column(scale=4):
LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S")
header_md_text = HEADER_MD.replace("{LAST_UPDATED}", str(LAST_UPDATED))
gr.Markdown(header_md_text, elem_classes="markdown-text")
text_size = gr.themes.sizes.text_lg
gr.Markdown("<hr style='margin: 20px 0;'>", elem_id="divider")
with gr.Tabs() as tabs:
with gr.Tab("🥇 Leaderboard", id=0):
md_3 = gr.Markdown(leaderboard_md, elem_id="markdown")
build_leaderboard()
with gr.Tab("ℹ️ About Us", id=1):
gr.Markdown(ABOUT_MD, elem_classes="markdown-text")
demo.launch()