Spaces:
Running
Running
Commit
β’
e37cee2
1
Parent(s):
e47d0b2
add support for more backend
Browse files- app.py +36 -14
- hardware.yml β hardware.yaml +6 -4
- requirements.txt +3 -2
- src/content.py +6 -6
- src/dependency.py +3 -0
- src/hardware.py +9 -9
- src/llm_perf.py +33 -14
- src/panel.py +67 -22
app.py
CHANGED
@@ -14,10 +14,13 @@ from src.panel import (
|
|
14 |
create_select_callback,
|
15 |
)
|
16 |
|
17 |
-
configs = load_hardware_configs("hardware.
|
18 |
|
19 |
|
20 |
-
demo = gr.Blocks(
|
|
|
|
|
|
|
21 |
with demo:
|
22 |
gr.HTML(LOGO, elem_classes="logo")
|
23 |
gr.HTML(TITLE, elem_classes="title")
|
@@ -29,13 +32,13 @@ with demo:
|
|
29 |
if config.detail:
|
30 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
31 |
|
32 |
-
|
33 |
-
|
34 |
# ####################### CONTROL PANEL #######################
|
35 |
(
|
36 |
filter_button,
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
score_slider,
|
40 |
memory_slider,
|
41 |
backend_checkboxes,
|
@@ -43,18 +46,33 @@ with demo:
|
|
43 |
optimization_checkboxes,
|
44 |
quantization_checkboxes,
|
45 |
kernels_checkboxes,
|
46 |
-
) = create_control_panel(
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
####################### HARDWARE SUBTABS #######################
|
48 |
with gr.Tabs(elem_classes="subtabs"):
|
49 |
-
open_llm_perf_df = get_llm_perf_df(
|
|
|
|
|
|
|
|
|
|
|
50 |
####################### LEADERBOARD TAB #######################
|
51 |
with gr.TabItem("Leaderboard π
", id=0):
|
52 |
search_bar, columns_checkboxes, leaderboard_table = (
|
53 |
create_leaderboard_table(open_llm_perf_df)
|
54 |
)
|
55 |
-
if
|
|
|
|
|
56 |
with gr.TabItem("Find Your Best Model π§", id=1):
|
57 |
-
lat_score_mem_plot = create_lat_score_mem_plot(
|
|
|
|
|
58 |
###################### ATTENTIONS SPEEDUP TAB #######################
|
59 |
# with gr.TabItem("Attention π", id=2):
|
60 |
# attn_prefill_plot, attn_decode_plot = create_attn_plots(
|
@@ -70,8 +88,10 @@ with demo:
|
|
70 |
create_control_callback(
|
71 |
filter_button,
|
72 |
# inputs
|
73 |
-
|
74 |
-
|
|
|
|
|
75 |
score_slider,
|
76 |
memory_slider,
|
77 |
backend_checkboxes,
|
@@ -93,8 +113,10 @@ with demo:
|
|
93 |
|
94 |
create_select_callback(
|
95 |
# inputs
|
96 |
-
|
97 |
-
|
|
|
|
|
98 |
# interactive
|
99 |
columns_checkboxes,
|
100 |
search_bar,
|
|
|
14 |
create_select_callback,
|
15 |
)
|
16 |
|
17 |
+
configs = load_hardware_configs("hardware.yaml")
|
18 |
|
19 |
|
20 |
+
demo = gr.Blocks(
|
21 |
+
css=custom_css,
|
22 |
+
theme=gr.themes.Default(primary_hue="indigo", secondary_hue="indigo"),
|
23 |
+
)
|
24 |
with demo:
|
25 |
gr.HTML(LOGO, elem_classes="logo")
|
26 |
gr.HTML(TITLE, elem_classes="title")
|
|
|
32 |
if config.detail:
|
33 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
34 |
|
|
|
|
|
35 |
# ####################### CONTROL PANEL #######################
|
36 |
(
|
37 |
filter_button,
|
38 |
+
machine_value,
|
39 |
+
subsets_value,
|
40 |
+
backends_value,
|
41 |
+
hardware_type_value,
|
42 |
score_slider,
|
43 |
memory_slider,
|
44 |
backend_checkboxes,
|
|
|
46 |
optimization_checkboxes,
|
47 |
quantization_checkboxes,
|
48 |
kernels_checkboxes,
|
49 |
+
) = create_control_panel(
|
50 |
+
machine=config.machine,
|
51 |
+
subsets=config.subsets,
|
52 |
+
backends=config.backends,
|
53 |
+
hardware_type=config.hardware_type,
|
54 |
+
hardware_provider=config.hardware_provider,
|
55 |
+
)
|
56 |
####################### HARDWARE SUBTABS #######################
|
57 |
with gr.Tabs(elem_classes="subtabs"):
|
58 |
+
open_llm_perf_df = get_llm_perf_df(
|
59 |
+
machine=config.machine,
|
60 |
+
subsets=config.subsets,
|
61 |
+
backends=config.backends,
|
62 |
+
hardware_type=config.hardware_type,
|
63 |
+
)
|
64 |
####################### LEADERBOARD TAB #######################
|
65 |
with gr.TabItem("Leaderboard π
", id=0):
|
66 |
search_bar, columns_checkboxes, leaderboard_table = (
|
67 |
create_leaderboard_table(open_llm_perf_df)
|
68 |
)
|
69 |
+
if (
|
70 |
+
config.hardware_provider != "intel"
|
71 |
+
): # TODO intel CPU does not measure the memory requirements correctly, so disable the graph feature until we fix the underlying issue
|
72 |
with gr.TabItem("Find Your Best Model π§", id=1):
|
73 |
+
lat_score_mem_plot = create_lat_score_mem_plot(
|
74 |
+
open_llm_perf_df
|
75 |
+
)
|
76 |
###################### ATTENTIONS SPEEDUP TAB #######################
|
77 |
# with gr.TabItem("Attention π", id=2):
|
78 |
# attn_prefill_plot, attn_decode_plot = create_attn_plots(
|
|
|
88 |
create_control_callback(
|
89 |
filter_button,
|
90 |
# inputs
|
91 |
+
machine_value,
|
92 |
+
subsets_value,
|
93 |
+
backends_value,
|
94 |
+
hardware_type_value,
|
95 |
score_slider,
|
96 |
memory_slider,
|
97 |
backend_checkboxes,
|
|
|
113 |
|
114 |
create_select_callback(
|
115 |
# inputs
|
116 |
+
machine_value,
|
117 |
+
subsets_value,
|
118 |
+
backends_value,
|
119 |
+
hardware_type_value,
|
120 |
# interactive
|
121 |
columns_checkboxes,
|
122 |
search_bar,
|
hardware.yml β hardware.yaml
RENAMED
@@ -1,7 +1,7 @@
|
|
1 |
- machine: 1xA10
|
2 |
description: A10-24GB-150W π₯οΈ
|
3 |
hardware_provider: nvidia
|
4 |
-
hardware_type:
|
5 |
subsets:
|
6 |
- unquantized
|
7 |
- awq
|
@@ -13,7 +13,7 @@
|
|
13 |
- machine: 1xA100
|
14 |
description: A100-80GB-275W π₯οΈ
|
15 |
hardware_provider: nvidia
|
16 |
-
hardware_type:
|
17 |
subsets:
|
18 |
- unquantized
|
19 |
- awq
|
@@ -25,7 +25,7 @@
|
|
25 |
- machine: 1xT4
|
26 |
description: T4-16GB-70W π₯οΈ
|
27 |
hardware_provider: nvidia
|
28 |
-
hardware_type:
|
29 |
subsets:
|
30 |
- unquantized
|
31 |
- awq
|
@@ -43,4 +43,6 @@
|
|
43 |
subsets:
|
44 |
- unquantized
|
45 |
backends:
|
46 |
-
- pytorch
|
|
|
|
|
|
1 |
- machine: 1xA10
|
2 |
description: A10-24GB-150W π₯οΈ
|
3 |
hardware_provider: nvidia
|
4 |
+
hardware_type: cuda
|
5 |
subsets:
|
6 |
- unquantized
|
7 |
- awq
|
|
|
13 |
- machine: 1xA100
|
14 |
description: A100-80GB-275W π₯οΈ
|
15 |
hardware_provider: nvidia
|
16 |
+
hardware_type: cuda
|
17 |
subsets:
|
18 |
- unquantized
|
19 |
- awq
|
|
|
25 |
- machine: 1xT4
|
26 |
description: T4-16GB-70W π₯οΈ
|
27 |
hardware_provider: nvidia
|
28 |
+
hardware_type: cuda
|
29 |
subsets:
|
30 |
- unquantized
|
31 |
- awq
|
|
|
43 |
subsets:
|
44 |
- unquantized
|
45 |
backends:
|
46 |
+
- pytorch
|
47 |
+
- openvino
|
48 |
+
- onnxruntime
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
huggingface_hub
|
2 |
transformers
|
3 |
-
gradio
|
4 |
plotly
|
5 |
-
pandas
|
|
|
|
1 |
huggingface_hub
|
2 |
transformers
|
3 |
+
gradio>=5.0.0
|
4 |
plotly
|
5 |
+
pandas
|
6 |
+
ruff
|
src/content.py
CHANGED
@@ -5,18 +5,18 @@ TITLE = """<h1 align="center" id="space-title">π€ LLM-Perf Leaderboard ποΈ
|
|
5 |
ABOUT = """
|
6 |
## π About
|
7 |
The π€ LLM-Perf Leaderboard ποΈ is a laderboard at the intersection of quality and performance.
|
8 |
-
Its aim is to benchmark the performance (latency, throughput, memory & energy)
|
9 |
-
of Large Language Models (LLMs) with different hardwares, backends and optimizations
|
10 |
using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
|
11 |
|
12 |
-
Anyone from the community can request a new base model or hardware/backend/optimization
|
13 |
configuration for automated benchmarking:
|
14 |
|
15 |
-
- Model evaluation requests should be made in the
|
16 |
[π€ Open LLM Leaderboard π
](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
|
17 |
we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
|
18 |
-
- Hardware/Backend/Optimization configuration requests should be made in the
|
19 |
-
[π€ LLM-Perf Leaderboard ποΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
|
20 |
[Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
|
21 |
|
22 |
## βοΈ Details
|
|
|
5 |
ABOUT = """
|
6 |
## π About
|
7 |
The π€ LLM-Perf Leaderboard ποΈ is a laderboard at the intersection of quality and performance.
|
8 |
+
Its aim is to benchmark the performance (latency, throughput, memory & energy)
|
9 |
+
of Large Language Models (LLMs) with different hardwares, backends and optimizations
|
10 |
using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
|
11 |
|
12 |
+
Anyone from the community can request a new base model or hardware/backend/optimization
|
13 |
configuration for automated benchmarking:
|
14 |
|
15 |
+
- Model evaluation requests should be made in the
|
16 |
[π€ Open LLM Leaderboard π
](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
|
17 |
we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
|
18 |
+
- Hardware/Backend/Optimization configuration requests should be made in the
|
19 |
+
[π€ LLM-Perf Leaderboard ποΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
|
20 |
[Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
|
21 |
|
22 |
## βοΈ Details
|
src/dependency.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
src/hardware.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1 |
-
from typing import Any, Dict, List
|
2 |
|
3 |
import yaml
|
4 |
|
5 |
|
6 |
class HardwareConfig:
|
7 |
def __init__(self, data: Dict[str, Any]):
|
8 |
-
self.machine = data["machine"]
|
9 |
-
self.description = data["description"]
|
10 |
-
self.hardware_provider = data["hardware_provider"]
|
11 |
-
self.hardware_type = data["hardware_type"]
|
12 |
-
self.subsets = data["subsets"]
|
13 |
-
self.backends = data["backends"]
|
14 |
-
self.detail = data.get("detail", None)
|
15 |
|
16 |
-
def __repr__(self):
|
17 |
return (
|
18 |
f"HardwareConfig(machine='{self.machine}', description='{self.description}', "
|
19 |
f"hardware_provider={self.hardware_provider}, hardware_type={self.hardware_type}, subsets={self.subsets}, backends={self.backends})"
|
|
|
1 |
+
from typing import Any, Dict, List, Optional
|
2 |
|
3 |
import yaml
|
4 |
|
5 |
|
6 |
class HardwareConfig:
|
7 |
def __init__(self, data: Dict[str, Any]):
|
8 |
+
self.machine: str = data["machine"]
|
9 |
+
self.description: str = data["description"]
|
10 |
+
self.hardware_provider: str = data["hardware_provider"]
|
11 |
+
self.hardware_type: str = data["hardware_type"]
|
12 |
+
self.subsets: List[str] = data["subsets"]
|
13 |
+
self.backends: List[str] = data["backends"]
|
14 |
+
self.detail: Optional[str] = data.get("detail", None)
|
15 |
|
16 |
+
def __repr__(self) -> str:
|
17 |
return (
|
18 |
f"HardwareConfig(machine='{self.machine}', description='{self.description}', "
|
19 |
f"hardware_provider={self.hardware_provider}, hardware_type={self.hardware_type}, subsets={self.subsets}, backends={self.backends})"
|
src/llm_perf.py
CHANGED
@@ -3,8 +3,6 @@ from typing import List
|
|
3 |
|
4 |
import pandas as pd
|
5 |
|
6 |
-
from src.hardware import HardwareConfig
|
7 |
-
|
8 |
from .utils import process_kernels, process_quantizations
|
9 |
|
10 |
DATASET_DIRECTORY = "dataset"
|
@@ -34,17 +32,31 @@ SORTING_COLUMNS = ["Open LLM Score (%)", "Decode (tokens/s)", "Prefill (s)"]
|
|
34 |
SORTING_ASCENDING = [False, True, False]
|
35 |
|
36 |
|
37 |
-
def get_raw_llm_perf_df(
|
|
|
|
|
38 |
dfs = []
|
39 |
for subset in subsets:
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
44 |
)
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
perf_df = pd.concat(dfs)
|
50 |
llm_df = pd.read_csv(
|
@@ -112,15 +124,22 @@ def processed_llm_perf_df(llm_perf_df):
|
|
112 |
return llm_perf_df
|
113 |
|
114 |
|
115 |
-
def get_llm_perf_df(
|
|
|
|
|
116 |
if not os.path.exists(DATASET_DIRECTORY):
|
117 |
os.makedirs(DATASET_DIRECTORY)
|
118 |
|
119 |
if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
|
120 |
-
llm_perf_df = pd.read_csv(
|
|
|
|
|
121 |
else:
|
122 |
-
|
|
|
123 |
llm_perf_df = processed_llm_perf_df(llm_perf_df)
|
124 |
-
llm_perf_df.to_csv(
|
|
|
|
|
125 |
|
126 |
return llm_perf_df
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
|
|
|
|
|
6 |
from .utils import process_kernels, process_quantizations
|
7 |
|
8 |
DATASET_DIRECTORY = "dataset"
|
|
|
32 |
SORTING_ASCENDING = [False, True, False]
|
33 |
|
34 |
|
35 |
+
def get_raw_llm_perf_df(
|
36 |
+
machine: str, subsets: List[str], backends: List[str], hardware_type: str
|
37 |
+
):
|
38 |
dfs = []
|
39 |
for subset in subsets:
|
40 |
+
for backend in backends:
|
41 |
+
try:
|
42 |
+
dfs.append(
|
43 |
+
pd.read_csv(
|
44 |
+
f"hf://datasets/optimum-benchmark/llm-perf-leaderboard/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv"
|
45 |
+
)
|
46 |
)
|
47 |
+
except Exception:
|
48 |
+
print("Dataset not found for:")
|
49 |
+
print(f" β’ Backend: {backend}")
|
50 |
+
print(f" β’ Subset: {subset}")
|
51 |
+
print(f" β’ Machine: {machine}")
|
52 |
+
print(f" β’ Hardware Type: {hardware_type}")
|
53 |
+
url = f"https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard/blob/main/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv"
|
54 |
+
print(f" β’ URL: {url}")
|
55 |
+
|
56 |
+
if len(dfs) == 0:
|
57 |
+
raise ValueError(
|
58 |
+
f"No datasets found for machine {machine}, check your hardware.yml config file or your datatset on huggingface"
|
59 |
+
)
|
60 |
|
61 |
perf_df = pd.concat(dfs)
|
62 |
llm_df = pd.read_csv(
|
|
|
124 |
return llm_perf_df
|
125 |
|
126 |
|
127 |
+
def get_llm_perf_df(
|
128 |
+
machine: str, subsets: List[str], backends: List[str], hardware_type: str
|
129 |
+
):
|
130 |
if not os.path.exists(DATASET_DIRECTORY):
|
131 |
os.makedirs(DATASET_DIRECTORY)
|
132 |
|
133 |
if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
|
134 |
+
llm_perf_df = pd.read_csv(
|
135 |
+
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"
|
136 |
+
)
|
137 |
else:
|
138 |
+
print(f"Dataset machine {machine} not found, downloading...")
|
139 |
+
llm_perf_df = get_raw_llm_perf_df(machine, subsets, backends, hardware_type)
|
140 |
llm_perf_df = processed_llm_perf_df(llm_perf_df)
|
141 |
+
llm_perf_df.to_csv(
|
142 |
+
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False
|
143 |
+
)
|
144 |
|
145 |
return llm_perf_df
|
src/panel.py
CHANGED
@@ -10,17 +10,30 @@ from src.llm_perf import get_llm_perf_df
|
|
10 |
from src.map import get_lat_score_mem_fig
|
11 |
|
12 |
|
13 |
-
def create_control_panel(
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# controls
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
18 |
|
19 |
if hardware_provider == "nvidia":
|
20 |
backends = ["pytorch"]
|
21 |
attention_implementations = ["Eager", "SDPA", "FAv2"]
|
22 |
quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
|
23 |
-
kernels = [
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
elif hardware_provider == "intel":
|
25 |
backends = ["pytorch", "onnxruntime", "openvino"]
|
26 |
attention_implementations = ["Eager"]
|
@@ -29,7 +42,6 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
|
|
29 |
else:
|
30 |
raise ValueError(f"Unknown hardware provider: {hardware_provider}")
|
31 |
|
32 |
-
|
33 |
with gr.Accordion("Control Panel ποΈ", open=False, elem_id="control-panel"):
|
34 |
with gr.Row():
|
35 |
with gr.Column(scale=2, variant="panel"):
|
@@ -101,8 +113,10 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
|
|
101 |
|
102 |
return (
|
103 |
filter_button,
|
104 |
-
|
105 |
-
|
|
|
|
|
106 |
score_slider,
|
107 |
memory_slider,
|
108 |
backend_checkboxes,
|
@@ -116,10 +130,12 @@ def create_control_panel(machine: str, subsets: List[str], hardware_provider: st
|
|
116 |
def filter_rows_fn(
|
117 |
machine,
|
118 |
subsets,
|
|
|
|
|
119 |
# inputs
|
120 |
score,
|
121 |
memory,
|
122 |
-
|
123 |
precisions,
|
124 |
attentions,
|
125 |
quantizations,
|
@@ -128,12 +144,14 @@ def filter_rows_fn(
|
|
128 |
columns,
|
129 |
search,
|
130 |
):
|
131 |
-
llm_perf_df = get_llm_perf_df(
|
|
|
|
|
132 |
# print(attentions)
|
133 |
# print(llm_perf_df["Attention ποΈ"].unique())
|
134 |
filtered_llm_perf_df = llm_perf_df[
|
135 |
llm_perf_df["Model π€"].str.contains(search, case=False)
|
136 |
-
& llm_perf_df["Backend π"].isin(
|
137 |
& llm_perf_df["Precision π₯"].isin(precisions)
|
138 |
& llm_perf_df["Attention ποΈ"].isin(attentions)
|
139 |
& llm_perf_df["Quantization ποΈ"].isin(quantizations)
|
@@ -142,7 +160,7 @@ def filter_rows_fn(
|
|
142 |
& (llm_perf_df["Memory (MB)"] <= memory)
|
143 |
]
|
144 |
selected_filtered_llm_perf_df = select_columns_fn(
|
145 |
-
machine, subsets, columns, search, filtered_llm_perf_df
|
146 |
)
|
147 |
selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
|
148 |
# filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
|
@@ -168,8 +186,10 @@ def create_control_callback(
|
|
168 |
# button
|
169 |
filter_button,
|
170 |
# fixed
|
171 |
-
|
172 |
-
|
|
|
|
|
173 |
# inputs
|
174 |
score_slider,
|
175 |
memory_slider,
|
@@ -195,8 +215,10 @@ def create_control_callback(
|
|
195 |
fn=filter_rows_fn,
|
196 |
inputs=[
|
197 |
# fixed
|
198 |
-
|
199 |
-
|
|
|
|
|
200 |
# inputs
|
201 |
score_slider,
|
202 |
memory_slider,
|
@@ -222,9 +244,16 @@ def create_control_callback(
|
|
222 |
)
|
223 |
|
224 |
|
225 |
-
def select_columns_fn(
|
|
|
|
|
226 |
if llm_perf_df is None:
|
227 |
-
llm_perf_df = get_llm_perf_df(
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
|
230 |
selected_leaderboard_df = selected_leaderboard_df[
|
@@ -237,8 +266,10 @@ def select_columns_fn(machine, subsets, columns, search, llm_perf_df=None):
|
|
237 |
|
238 |
def create_select_callback(
|
239 |
# fixed
|
240 |
-
|
241 |
-
|
|
|
|
|
242 |
# interactive
|
243 |
columns_checkboxes,
|
244 |
search_bar,
|
@@ -247,11 +278,25 @@ def create_select_callback(
|
|
247 |
):
|
248 |
columns_checkboxes.change(
|
249 |
fn=select_columns_fn,
|
250 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
outputs=[leaderboard_table],
|
252 |
)
|
253 |
search_bar.change(
|
254 |
fn=select_columns_fn,
|
255 |
-
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
outputs=[leaderboard_table],
|
257 |
)
|
|
|
10 |
from src.map import get_lat_score_mem_fig
|
11 |
|
12 |
|
13 |
+
def create_control_panel(
|
14 |
+
machine: str,
|
15 |
+
subsets: List[str],
|
16 |
+
backends: List[str],
|
17 |
+
hardware_provider: str,
|
18 |
+
hardware_type: str,
|
19 |
+
):
|
20 |
# controls
|
21 |
+
machine_value = gr.State(value=machine)
|
22 |
+
subsets_value = gr.State(value=subsets)
|
23 |
+
backends_value = gr.State(value=backends)
|
24 |
+
hardware_type_value = gr.State(value=hardware_type)
|
25 |
|
26 |
if hardware_provider == "nvidia":
|
27 |
backends = ["pytorch"]
|
28 |
attention_implementations = ["Eager", "SDPA", "FAv2"]
|
29 |
quantizations = ["Unquantized", "BnB.4bit", "BnB.8bit", "AWQ.4bit", "GPTQ.4bit"]
|
30 |
+
kernels = [
|
31 |
+
"No Kernel",
|
32 |
+
"GPTQ.ExllamaV1",
|
33 |
+
"GPTQ.ExllamaV2",
|
34 |
+
"AWQ.GEMM",
|
35 |
+
"AWQ.GEMV",
|
36 |
+
]
|
37 |
elif hardware_provider == "intel":
|
38 |
backends = ["pytorch", "onnxruntime", "openvino"]
|
39 |
attention_implementations = ["Eager"]
|
|
|
42 |
else:
|
43 |
raise ValueError(f"Unknown hardware provider: {hardware_provider}")
|
44 |
|
|
|
45 |
with gr.Accordion("Control Panel ποΈ", open=False, elem_id="control-panel"):
|
46 |
with gr.Row():
|
47 |
with gr.Column(scale=2, variant="panel"):
|
|
|
113 |
|
114 |
return (
|
115 |
filter_button,
|
116 |
+
machine_value,
|
117 |
+
backends_value,
|
118 |
+
hardware_type_value,
|
119 |
+
subsets_value,
|
120 |
score_slider,
|
121 |
memory_slider,
|
122 |
backend_checkboxes,
|
|
|
130 |
def filter_rows_fn(
|
131 |
machine,
|
132 |
subsets,
|
133 |
+
backends,
|
134 |
+
hardware_type,
|
135 |
# inputs
|
136 |
score,
|
137 |
memory,
|
138 |
+
backend_checkboxes,
|
139 |
precisions,
|
140 |
attentions,
|
141 |
quantizations,
|
|
|
144 |
columns,
|
145 |
search,
|
146 |
):
|
147 |
+
llm_perf_df = get_llm_perf_df(
|
148 |
+
machine=machine, subsets=subsets, backends=backends, hardware_type=hardware_type
|
149 |
+
)
|
150 |
# print(attentions)
|
151 |
# print(llm_perf_df["Attention ποΈ"].unique())
|
152 |
filtered_llm_perf_df = llm_perf_df[
|
153 |
llm_perf_df["Model π€"].str.contains(search, case=False)
|
154 |
+
& llm_perf_df["Backend π"].isin(backend_checkboxes)
|
155 |
& llm_perf_df["Precision π₯"].isin(precisions)
|
156 |
& llm_perf_df["Attention ποΈ"].isin(attentions)
|
157 |
& llm_perf_df["Quantization ποΈ"].isin(quantizations)
|
|
|
160 |
& (llm_perf_df["Memory (MB)"] <= memory)
|
161 |
]
|
162 |
selected_filtered_llm_perf_df = select_columns_fn(
|
163 |
+
machine, subsets, backends, hardware_type, columns, search, filtered_llm_perf_df
|
164 |
)
|
165 |
selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
|
166 |
# filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
|
|
|
186 |
# button
|
187 |
filter_button,
|
188 |
# fixed
|
189 |
+
machine_value,
|
190 |
+
subsets_value,
|
191 |
+
backends_value,
|
192 |
+
hardware_type_value,
|
193 |
# inputs
|
194 |
score_slider,
|
195 |
memory_slider,
|
|
|
215 |
fn=filter_rows_fn,
|
216 |
inputs=[
|
217 |
# fixed
|
218 |
+
machine_value,
|
219 |
+
subsets_value,
|
220 |
+
backends_value,
|
221 |
+
hardware_type_value,
|
222 |
# inputs
|
223 |
score_slider,
|
224 |
memory_slider,
|
|
|
244 |
)
|
245 |
|
246 |
|
247 |
+
def select_columns_fn(
|
248 |
+
machine, subsets, backends, hardware_type, columns, search, llm_perf_df=None
|
249 |
+
):
|
250 |
if llm_perf_df is None:
|
251 |
+
llm_perf_df = get_llm_perf_df(
|
252 |
+
machine=machine,
|
253 |
+
subsets=subsets,
|
254 |
+
backends=backends,
|
255 |
+
hardware_type=hardware_type,
|
256 |
+
)
|
257 |
|
258 |
selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
|
259 |
selected_leaderboard_df = selected_leaderboard_df[
|
|
|
266 |
|
267 |
def create_select_callback(
|
268 |
# fixed
|
269 |
+
machine_value,
|
270 |
+
subsets_value,
|
271 |
+
backends_value,
|
272 |
+
hardware_type_value,
|
273 |
# interactive
|
274 |
columns_checkboxes,
|
275 |
search_bar,
|
|
|
278 |
):
|
279 |
columns_checkboxes.change(
|
280 |
fn=select_columns_fn,
|
281 |
+
inputs=[
|
282 |
+
machine_value,
|
283 |
+
subsets_value,
|
284 |
+
backends_value,
|
285 |
+
hardware_type_value,
|
286 |
+
columns_checkboxes,
|
287 |
+
search_bar,
|
288 |
+
],
|
289 |
outputs=[leaderboard_table],
|
290 |
)
|
291 |
search_bar.change(
|
292 |
fn=select_columns_fn,
|
293 |
+
inputs=[
|
294 |
+
machine_value,
|
295 |
+
subsets_value,
|
296 |
+
backends_value,
|
297 |
+
hardware_type_value,
|
298 |
+
columns_checkboxes,
|
299 |
+
search_bar,
|
300 |
+
],
|
301 |
outputs=[leaderboard_table],
|
302 |
)
|