Spaces:
Running
Running
File size: 5,408 Bytes
6906870 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import json
import gradio as gr
import pandas as pd
from css_html import custom_css
from text_content import ABOUT_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT
from utils import (
AutoEvalColumn,
fields,
)
result_path = './RESULTS.json'
with open(result_path, 'r') as f:
data = json.load(f)
rows = []
for col, subcols in data.items():
row = {"model": col}
for subcol, datas in subcols.items():
if subcol == 'readability':
symbol = 'π'
elif subcol == 'maintainability':
symbol = 'π¨'
elif subcol == 'efficiency':
symbol = 'π'
elif subcol == 'correctness':
symbol = 'β
'
elif subcol == 'overall':
symbol = 'π―'
for key, value in datas.items():
row[f'{symbol} {key}'] = value
rows.append(row)
df = pd.DataFrame(rows)
df = df.sort_values(by='π― RACE Score', ascending=False)
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [
c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
TYPES_LITE = [
c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
def select_columns(df, columns):
always_here_cols = [
AutoEvalColumn.model.name,
]
# We use COLS to maintain sorting
filtered_df = df[
always_here_cols + [c for c in COLS if c in df.columns and c in columns]
]
return filtered_df
demo = gr.Blocks(css=custom_css)
with demo:
with gr.Row():
gr.Markdown(
"""<div style="text-align: center;"><h1> ποΈRACE Leaderboard</h1></div>\
<br>\
<p>Based on the ποΈRACE benchmark, we demonstrated the ability of different LLMs to generate code that is <b><i>correct</i></b> and <b><i>meets the requirements of real-world development scenarios</i></b>.</p>
<p>Model details about how to evalute the LLM are available in the <a href="https://github.com/test/test">ποΈRACE GitHub repository</a>.</p>
""",
elem_classes="markdown-text",
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.Column():
with gr.Tabs(elem_classes="A100-tabs") as A100_tabs:
with gr.TabItem("π Evaluation Table", id=0):
with gr.Column():
with gr.Accordion("β¬ Hidden Columns", open=False):
shown_columns = gr.CheckboxGroup(
choices=[
c
for c in COLS
if c
not in [
AutoEvalColumn.model.name,
]
],
value=[
c
for c in COLS_LITE
if c
not in [
AutoEvalColumn.model.name,
]
],
label="",
elem_id="column-select",
interactive=True,
)
leaderboard_df = gr.components.Dataframe(
value=df[
[
AutoEvalColumn.model.name,
]
+ shown_columns.value
],
headers=COLS,
datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
)
hidden_leaderboard_df = gr.components.Dataframe(
value=df,
headers=COLS,
datatype=["str" for _ in range(len(COLS))],
visible=False,
)
shown_columns.change(
select_columns,
[hidden_leaderboard_df, shown_columns],
leaderboard_df,
)
gr.Markdown(NOTES_TEXT, elem_classes="markdown-text")
with gr.TabItem("π About", id=1):
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("π Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=10,
elem_id="citation-button",
show_copy_button=True,
)
with gr.Row():
with gr.Accordion("π Acknowledgement", open=False):
gr.Markdown(ACKNOWLEDGEMENT_TEXT)
demo.launch() |