display button unification for benchmarks (#28)
Browse files- add button unification (670054face4e2bee73a77f83e6141c1a99e09a7c)
- app.py +17 -4
- src/display/utils.py +10 -10
app.py
CHANGED
@@ -89,6 +89,17 @@ def init_space():
|
|
89 |
EVAL_REQUESTS_PATH, EVAL_COLS
|
90 |
)
|
91 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
# Searching and filtering
|
94 |
def update_table(
|
@@ -96,7 +107,8 @@ def update_table(
|
|
96 |
):
|
97 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
|
98 |
filtered_df = filter_queries(query, filtered_df)
|
99 |
-
|
|
|
100 |
return df
|
101 |
|
102 |
|
@@ -270,18 +282,19 @@ with demo:
|
|
270 |
# )
|
271 |
|
272 |
# breakpoint()
|
273 |
-
|
274 |
leaderboard_table = gr.components.Dataframe(
|
275 |
value=(
|
276 |
leaderboard_df[
|
277 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
278 |
+ shown_columns.value
|
|
|
279 |
+ [AutoEvalColumn.dummy.name]
|
280 |
]
|
281 |
if leaderboard_df.empty is False
|
282 |
else leaderboard_df
|
283 |
),
|
284 |
-
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
285 |
datatype=TYPES,
|
286 |
elem_id="leaderboard-table",
|
287 |
interactive=False,
|
@@ -313,7 +326,7 @@ with demo:
|
|
313 |
demo.load(load_query, inputs=[], outputs=[search_bar])
|
314 |
|
315 |
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
|
316 |
-
selector.
|
317 |
update_table,
|
318 |
[
|
319 |
hidden_leaderboard_table_for_search,
|
|
|
89 |
EVAL_REQUESTS_PATH, EVAL_COLS
|
90 |
)
|
91 |
return dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
92 |
+
|
93 |
+
|
94 |
+
def add_benchmark_columns(shown_columns):
|
95 |
+
benchmark_columns = []
|
96 |
+
for benchmark in BENCHMARK_COLS:
|
97 |
+
if benchmark in shown_columns:
|
98 |
+
for c in COLS:
|
99 |
+
if benchmark in c and benchmark != c:
|
100 |
+
benchmark_columns.append(c)
|
101 |
+
return benchmark_columns
|
102 |
+
|
103 |
|
104 |
# Searching and filtering
|
105 |
def update_table(
|
|
|
107 |
):
|
108 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
|
109 |
filtered_df = filter_queries(query, filtered_df)
|
110 |
+
benchmark_columns = add_benchmark_columns(columns)
|
111 |
+
df = select_columns(filtered_df, columns + benchmark_columns)
|
112 |
return df
|
113 |
|
114 |
|
|
|
282 |
# )
|
283 |
|
284 |
# breakpoint()
|
285 |
+
benchmark_columns = add_benchmark_columns(shown_columns.value)
|
286 |
leaderboard_table = gr.components.Dataframe(
|
287 |
value=(
|
288 |
leaderboard_df[
|
289 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
290 |
+ shown_columns.value
|
291 |
+
+ benchmark_columns
|
292 |
+ [AutoEvalColumn.dummy.name]
|
293 |
]
|
294 |
if leaderboard_df.empty is False
|
295 |
else leaderboard_df
|
296 |
),
|
297 |
+
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value + benchmark_columns,
|
298 |
datatype=TYPES,
|
299 |
elem_id="leaderboard-table",
|
300 |
interactive=False,
|
|
|
326 |
demo.load(load_query, inputs=[], outputs=[search_bar])
|
327 |
|
328 |
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size]:
|
329 |
+
selector.change(
|
330 |
update_table,
|
331 |
[
|
332 |
hidden_leaderboard_table_for_search,
|
src/display/utils.py
CHANGED
@@ -104,16 +104,16 @@ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnConten
|
|
104 |
for task in Tasks:
|
105 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
106 |
# System performance metrics
|
107 |
-
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
|
108 |
-
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
|
109 |
-
# auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True)])
|
110 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
|
111 |
-
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
|
112 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
|
113 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
114 |
continue
|
115 |
-
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False)])
|
116 |
-
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
|
117 |
|
118 |
|
119 |
# Model information
|
@@ -242,8 +242,8 @@ class Precision(Enum):
|
|
242 |
|
243 |
|
244 |
# Column selection
|
245 |
-
COLS = [c.name for c in fields(AutoEvalColumn)
|
246 |
-
TYPES = [c.type for c in fields(AutoEvalColumn)
|
247 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
248 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
249 |
|
|
|
104 |
for task in Tasks:
|
105 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
106 |
# System performance metrics
|
107 |
+
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True, hidden=True)])
|
108 |
+
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True, hidden=True)])
|
109 |
+
# auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True, hidden=True)])
|
110 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True, hidden=True)])
|
111 |
+
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True, hidden=True)])
|
112 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True, hidden=True)])
|
113 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
114 |
continue
|
115 |
+
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
|
116 |
+
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
|
117 |
|
118 |
|
119 |
# Model information
|
|
|
242 |
|
243 |
|
244 |
# Column selection
|
245 |
+
COLS = [c.name for c in fields(AutoEvalColumn)]
|
246 |
+
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
247 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
248 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
249 |
|