IlyasMoutawwakil HF staff commited on
Commit
7ecfa5a
β€’
1 Parent(s): 76b423c
Files changed (2) hide show
  1. src/control_panel.py +32 -32
  2. src/llm_perf.py +5 -5
src/control_panel.py CHANGED
@@ -40,7 +40,7 @@ def create_control_panel(machine: str):
40
  with gr.Row():
41
  with gr.Column(scale=1, variant="panel"):
42
  datatype_checkboxes = gr.CheckboxGroup(
43
- label="DTypes πŸ“₯",
44
  choices=["float32", "float16", "bfloat16"],
45
  value=["float32", "float16", "bfloat16"],
46
  info="β˜‘οΈ Select the load data types",
@@ -49,8 +49,8 @@ def create_control_panel(machine: str):
49
  with gr.Column(scale=1, variant="panel"):
50
  optimization_checkboxes = gr.CheckboxGroup(
51
  label="Attentions πŸ‘οΈ",
52
- choices=["None", "BetterTransformer", "FlashAttentionV2"],
53
- value=["None", "BetterTransformer", "FlashAttentionV2"],
54
  info="β˜‘οΈ Select the optimization",
55
  elem_id="optimization-checkboxes",
56
  )
@@ -61,21 +61,15 @@ def create_control_panel(machine: str):
61
  "None",
62
  "BnB.4bit",
63
  "BnB.8bit",
 
64
  "GPTQ.4bit",
65
- "GPTQ.4bit+ExllamaV1",
66
- "GPTQ.4bit+ExllamaV2",
67
- "AWQ.4bit+GEMM",
68
- "AWQ.4bit+GEMV",
69
  ],
70
  value=[
71
  "None",
72
  "BnB.4bit",
73
  "BnB.8bit",
 
74
  "GPTQ.4bit",
75
- "GPTQ.4bit+ExllamaV1",
76
- "GPTQ.4bit+ExllamaV2",
77
- "AWQ.4bit+GEMM",
78
- "AWQ.4bit+GEMV",
79
  ],
80
  info="β˜‘οΈ Select the quantization schemes",
81
  elem_id="quantization-checkboxes",
@@ -100,31 +94,35 @@ def create_control_panel(machine: str):
100
  )
101
 
102
 
103
- def filter_fn(
104
  machine,
105
  # inputs
106
  score,
107
  memory,
108
  backends,
109
- datatypes,
110
- optimizations,
111
  quantizations,
112
  # interactive
113
  columns,
114
  search,
115
  ):
116
- raw_df = get_llm_perf_df(machine=machine)
117
- filtered_df = raw_df[
118
- # raw_df["Model πŸ€—"].str.contains(model, case=False)
119
- raw_df["Backend 🏭"].isin(backends)
120
- & raw_df["DType πŸ“₯"].isin(datatypes)
121
- & raw_df["Attention πŸ‘οΈ"].isin(optimizations)
122
- & raw_df["Quantization πŸ—œοΈ"].isin(quantizations)
123
- & (raw_df["Open LLM Score (%)"] >= score)
124
- & (raw_df["Allocated Memory (MB)"] <= memory)
 
 
125
  ]
126
- filtered_leaderboard_df = select_fn(machine, columns, search)
127
- filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_df)
 
 
128
  # filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
129
  # filtered_bt_decode_fig = get_bt_decode_fig(filtered_df)
130
  # filtered_fa2_prefill_fig = get_fa2_prefill_fig(filtered_df)
@@ -133,8 +131,8 @@ def filter_fn(
133
  # filtered_quant_decode_fig = get_quant_decode_fig(filtered_df)
134
 
135
  return [
136
- filtered_leaderboard_df,
137
- filtered_lat_score_mem_fig,
138
  # filtered_bt_prefill_fig,
139
  # filtered_bt_decode_fig,
140
  # filtered_fa2_prefill_fig,
@@ -170,7 +168,7 @@ def create_control_callback(
170
  # quant_decode_plot,
171
  ):
172
  filter_button.click(
173
- fn=filter_fn,
174
  inputs=[
175
  # fixed
176
  machine_textbox,
@@ -198,8 +196,10 @@ def create_control_callback(
198
  )
199
 
200
 
201
- def select_fn(machine, columns, search):
202
- llm_perf_df = get_llm_perf_df(machine=machine)
 
 
203
  selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
204
  selected_leaderboard_df = selected_leaderboard_df[
205
  selected_leaderboard_df["Model πŸ€—"].str.contains(search, case=False)
@@ -219,12 +219,12 @@ def create_select_callback(
219
  leaderboard_table,
220
  ):
221
  columns_checkboxes.change(
222
- fn=select_fn,
223
  inputs=[machine_textbox, columns_checkboxes, search_bar],
224
  outputs=[leaderboard_table],
225
  )
226
  search_bar.change(
227
- fn=select_fn,
228
  inputs=[machine_textbox, columns_checkboxes, search_bar],
229
  outputs=[leaderboard_table],
230
  )
 
40
  with gr.Row():
41
  with gr.Column(scale=1, variant="panel"):
42
  datatype_checkboxes = gr.CheckboxGroup(
43
+ label="Precision πŸ“₯",
44
  choices=["float32", "float16", "bfloat16"],
45
  value=["float32", "float16", "bfloat16"],
46
  info="β˜‘οΈ Select the load data types",
 
49
  with gr.Column(scale=1, variant="panel"):
50
  optimization_checkboxes = gr.CheckboxGroup(
51
  label="Attentions πŸ‘οΈ",
52
+ choices=["Eager", "SDPA", "FAv2"],
53
+ value=["Eager", "SDPA", "FAv2"],
54
  info="β˜‘οΈ Select the optimization",
55
  elem_id="optimization-checkboxes",
56
  )
 
61
  "None",
62
  "BnB.4bit",
63
  "BnB.8bit",
64
+ "AWQ.4bit",
65
  "GPTQ.4bit",
 
 
 
 
66
  ],
67
  value=[
68
  "None",
69
  "BnB.4bit",
70
  "BnB.8bit",
71
+ "AWQ.4bit",
72
  "GPTQ.4bit",
 
 
 
 
73
  ],
74
  info="β˜‘οΈ Select the quantization schemes",
75
  elem_id="quantization-checkboxes",
 
94
  )
95
 
96
 
97
+ def filter_rows_fn(
98
  machine,
99
  # inputs
100
  score,
101
  memory,
102
  backends,
103
+ precisions,
104
+ attentions,
105
  quantizations,
106
  # interactive
107
  columns,
108
  search,
109
  ):
110
+ llm_perf_df = get_llm_perf_df(machine=machine)
111
+ # print(attentions)
112
+ # print(llm_perf_df["Attention πŸ‘οΈ"].unique())
113
+ filtered_llm_perf_df = llm_perf_df[
114
+ llm_perf_df["Model πŸ€—"].str.contains(search, case=False)
115
+ & llm_perf_df["Backend 🏭"].isin(backends)
116
+ & llm_perf_df["Precision πŸ“₯"].isin(precisions)
117
+ & llm_perf_df["Attention πŸ‘οΈ"].isin(attentions)
118
+ & llm_perf_df["Quantization πŸ—œοΈ"].isin(quantizations)
119
+ & (llm_perf_df["Open LLM Score (%)"] >= score)
120
+ & (llm_perf_df["Memory (MB)"] <= memory)
121
  ]
122
+ selected_filtered_llm_perf_df = select_columns_fn(
123
+ machine, columns, search, filtered_llm_perf_df
124
+ )
125
+ selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df)
126
  # filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df)
127
  # filtered_bt_decode_fig = get_bt_decode_fig(filtered_df)
128
  # filtered_fa2_prefill_fig = get_fa2_prefill_fig(filtered_df)
 
131
  # filtered_quant_decode_fig = get_quant_decode_fig(filtered_df)
132
 
133
  return [
134
+ selected_filtered_llm_perf_df,
135
+ selected_filtered_lat_score_mem_fig,
136
  # filtered_bt_prefill_fig,
137
  # filtered_bt_decode_fig,
138
  # filtered_fa2_prefill_fig,
 
168
  # quant_decode_plot,
169
  ):
170
  filter_button.click(
171
+ fn=filter_rows_fn,
172
  inputs=[
173
  # fixed
174
  machine_textbox,
 
196
  )
197
 
198
 
199
+ def select_columns_fn(machine, columns, search, llm_perf_df=None):
200
+ if llm_perf_df is None:
201
+ llm_perf_df = get_llm_perf_df(machine=machine)
202
+
203
  selected_leaderboard_df = get_leaderboard_df(llm_perf_df)
204
  selected_leaderboard_df = selected_leaderboard_df[
205
  selected_leaderboard_df["Model πŸ€—"].str.contains(search, case=False)
 
219
  leaderboard_table,
220
  ):
221
  columns_checkboxes.change(
222
+ fn=select_columns_fn,
223
  inputs=[machine_textbox, columns_checkboxes, search_bar],
224
  outputs=[leaderboard_table],
225
  )
226
  search_bar.change(
227
+ fn=select_columns_fn,
228
  inputs=[machine_textbox, columns_checkboxes, search_bar],
229
  outputs=[leaderboard_table],
230
  )
src/llm_perf.py CHANGED
@@ -36,19 +36,19 @@ def get_raw_llm_perf_df(machine: str = "1xA10"):
36
  try:
37
  dfs.append(
38
  pd.read_csv(
39
- f"hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-perf-leaderboard-{subset}-{machine}.csv"
40
  )
41
  )
42
  except Exception:
43
  print(f"Subset {subset} for machine {machine} not found")
44
 
45
- llm_perf_df = pd.concat(dfs)
46
- open_llm_df = pd.read_csv(
47
- "hf://datasets/optimum-benchmark/open-llm-leaderboard/open-llm-leaderboard.csv"
48
  )
49
 
50
  llm_perf_df = pd.merge(
51
- open_llm_df, llm_perf_df, left_on="Model", right_on="config.backend.model"
52
  )
53
 
54
  return llm_perf_df
 
36
  try:
37
  dfs.append(
38
  pd.read_csv(
39
+ f"hf://datasets/optimum-benchmark/llm-perf-leaderboard/perf-df-{subset}-{machine}.csv"
40
  )
41
  )
42
  except Exception:
43
  print(f"Subset {subset} for machine {machine} not found")
44
 
45
+ perf_df = pd.concat(dfs)
46
+ llm_df = pd.read_csv(
47
+ "hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv"
48
  )
49
 
50
  llm_perf_df = pd.merge(
51
+ llm_df, perf_df, left_on="Model", right_on="config.backend.model"
52
  )
53
 
54
  return llm_perf_df