t0-0 commited on
Commit
016c2e7
β€’
1 Parent(s): 61a2e0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -37
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
@@ -59,44 +58,92 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
59
  failed_eval_queue_df,
60
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
61
 
62
- def init_leaderboard(dataframe):
63
- if dataframe is None or dataframe.empty:
64
- raise ValueError("Leaderboard DataFrame is empty or None.")
65
- return Leaderboard(
66
- value=dataframe,
67
- datatype=[c.type for c in fields(AutoEvalColumn)],
68
- select_columns=SelectColumns(
69
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
70
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
71
- label="Select Columns to Display:",
72
- ),
73
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
74
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
75
- filter_columns=[
76
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
77
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
78
- ColumnFilter(
79
- AutoEvalColumn.params.name,
80
- type="slider",
81
- min=0.01,
82
- max=150,
83
- label="Select the number of parameters (B)",
84
- ),
85
- ColumnFilter(
86
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
87
- ),
88
- ColumnFilter(
89
- AutoEvalColumn.num_few_shots.name, type="checkboxgroup", label="Num few shots"
90
- ),
91
- ColumnFilter(
92
- AutoEvalColumn.add_special_tokens.name, type="checkboxgroup", label="Add Special Tokens"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  )
94
- ],
95
- bool_checkboxgroup_label="Hide models",
96
- interactive=False,
97
- )
98
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  demo = gr.Blocks(css=custom_css)
101
  with demo:
102
  gr.HTML(TITLE)
@@ -104,7 +151,138 @@ with demo:
104
 
105
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
106
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
107
- leaderboard = init_leaderboard(LEADERBOARD_DF)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
110
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
1
  import gradio as gr
 
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
  from huggingface_hub import snapshot_download
 
58
  failed_eval_queue_df,
59
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
60
 
61
+ # Searching and filtering
62
+ def update_table(
63
+ hidden_df: pd.DataFrame,
64
+ columns: list,
65
+ type_query: list,
66
+ precision_query: str,
67
+ size_query: list,
68
+ show_deleted: bool,
69
+ show_merges: bool,
70
+ show_flagged: bool,
71
+ query: str,
72
+ ):
73
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted, show_merges, show_flagged)
74
+ filtered_df = filter_queries(query, filtered_df)
75
+ df = select_columns(filtered_df, columns)
76
+ return df
77
+
78
+
79
+ def load_query(request: gr.Request): # triggered only once at startup => read query parameter if it exists
80
+ query = request.query_params.get("query") or ""
81
+ return query, query # return one for the "search_bar", one for a hidden component that triggers a reload only if value has changed
82
+
83
+
84
+ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
85
+ return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
86
+
87
+
88
+ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
89
+ always_here_cols = [
90
+ AutoEvalColumn.model_type_symbol.name,
91
+ AutoEvalColumn.model.name,
92
+ ]
93
+ # We use COLS to maintain sorting
94
+ filtered_df = df[
95
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
96
+ ]
97
+ return filtered_df
98
+
99
+
100
+ def filter_queries(query: str, filtered_df: pd.DataFrame):
101
+ """Added by Abishek"""
102
+ final_df = []
103
+ if query != "":
104
+ queries = [q.strip() for q in query.split(";")]
105
+ for _q in queries:
106
+ _q = _q.strip()
107
+ if _q != "":
108
+ temp_filtered_df = search_table(filtered_df, _q)
109
+ if len(temp_filtered_df) > 0:
110
+ final_df.append(temp_filtered_df)
111
+ if len(final_df) > 0:
112
+ filtered_df = pd.concat(final_df)
113
+ filtered_df = filtered_df.drop_duplicates(
114
+ subset=[AutoEvalColumn.model.name, AutoEvalColumn.precision.name, AutoEvalColumn.revision.name]
115
  )
116
+
117
+ return filtered_df
 
 
118
 
119
 
120
+ def filter_models(
121
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool, show_merges: bool, show_flagged: bool
122
+ ) -> pd.DataFrame:
123
+ # Show all models
124
+ if show_deleted:
125
+ filtered_df = df
126
+ else: # Show only still on the hub models
127
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
128
+
129
+ if not show_merges:
130
+ filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
131
+
132
+ if not show_flagged:
133
+ filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
134
+
135
+ type_emoji = [t[0] for t in type_query]
136
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
137
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
138
+
139
+ numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
140
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
141
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
142
+ filtered_df = filtered_df.loc[mask]
143
+ return filtered_df
144
+
145
+ leaderboard_df = filter_models(leaderboard_df, [t.to_str(" : ") for t in ModelType], list(NUMERIC_INTERVALS.keys()), [i.value.name for i in Precision], False, False, False)
146
+
147
  demo = gr.Blocks(css=custom_css)
148
  with demo:
149
  gr.HTML(TITLE)
 
151
 
152
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
153
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
154
+ with gr.Row():
155
+ with gr.Column():
156
+ with gr.Row():
157
+ search_bar = gr.Textbox(
158
+ placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
159
+ show_label=False,
160
+ elem_id="search-bar",
161
+ )
162
+ with gr.Row():
163
+ shown_columns = gr.CheckboxGroup(
164
+ choices=[
165
+ c.name
166
+ for c in fields(AutoEvalColumn)
167
+ if not c.hidden and not c.never_hidden and not c.dummy
168
+ ],
169
+ value=[
170
+ c.name
171
+ for c in fields(AutoEvalColumn)
172
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
173
+ ],
174
+ label="Select columns to show",
175
+ elem_id="column-select",
176
+ interactive=True,
177
+ )
178
+ with gr.Row():
179
+ deleted_models_visibility = gr.Checkbox(
180
+ value=False, label="Show private/deleted models", interactive=True
181
+ )
182
+ merged_models_visibility = gr.Checkbox(
183
+ value=False, label="Show merges", interactive=True
184
+ )
185
+ flagged_models_visibility = gr.Checkbox(
186
+ value=False, label="Show flagged models", interactive=True
187
+ )
188
+ with gr.Column(min_width=320):
189
+ #with gr.Box(elem_id="box-filter"):
190
+ filter_columns_type = gr.CheckboxGroup(
191
+ label="Model types",
192
+ choices=[t.to_str() for t in ModelType],
193
+ value=[t.to_str() for t in ModelType],
194
+ interactive=True,
195
+ elem_id="filter-columns-type",
196
+ )
197
+ filter_columns_precision = gr.CheckboxGroup(
198
+ label="Precision",
199
+ choices=[i.value.name for i in Precision],
200
+ value=[i.value.name for i in Precision],
201
+ interactive=True,
202
+ elem_id="filter-columns-precision",
203
+ )
204
+ filter_columns_size = gr.CheckboxGroup(
205
+ label="Model sizes (in billions of parameters)",
206
+ choices=list(NUMERIC_INTERVALS.keys()),
207
+ value=list(NUMERIC_INTERVALS.keys()),
208
+ interactive=True,
209
+ elem_id="filter-columns-size",
210
+ )
211
+
212
+ leaderboard_table = gr.components.Dataframe(
213
+ value=leaderboard_df[
214
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
215
+ + shown_columns.value
216
+ + [AutoEvalColumn.dummy.name]
217
+ ],
218
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
219
+ datatype=TYPES,
220
+ elem_id="leaderboard-table",
221
+ interactive=False,
222
+ visible=True,
223
+ #column_widths=["2%", "33%"]
224
+ )
225
+
226
+ # Dummy leaderboard for handling the case when the user uses backspace key
227
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
228
+ value=original_df[COLS],
229
+ headers=COLS,
230
+ datatype=TYPES,
231
+ visible=False,
232
+ )
233
+ search_bar.submit(
234
+ update_table,
235
+ [
236
+ hidden_leaderboard_table_for_search,
237
+ shown_columns,
238
+ filter_columns_type,
239
+ filter_columns_precision,
240
+ filter_columns_size,
241
+ deleted_models_visibility,
242
+ merged_models_visibility,
243
+ flagged_models_visibility,
244
+ search_bar,
245
+ ],
246
+ leaderboard_table,
247
+ )
248
+
249
+ # Define a hidden component that will trigger a reload only if a query parameter has be set
250
+ hidden_search_bar = gr.Textbox(value="", visible=False)
251
+ hidden_search_bar.change(
252
+ update_table,
253
+ [
254
+ hidden_leaderboard_table_for_search,
255
+ shown_columns,
256
+ filter_columns_type,
257
+ filter_columns_precision,
258
+ filter_columns_size,
259
+ deleted_models_visibility,
260
+ merged_models_visibility,
261
+ flagged_models_visibility,
262
+ search_bar,
263
+ ],
264
+ leaderboard_table,
265
+ )
266
+ # Check query parameter once at startup and update search bar + hidden component
267
+ demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
268
+
269
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility, merged_models_visibility, flagged_models_visibility]:
270
+ selector.change(
271
+ update_table,
272
+ [
273
+ hidden_leaderboard_table_for_search,
274
+ shown_columns,
275
+ filter_columns_type,
276
+ filter_columns_precision,
277
+ filter_columns_size,
278
+ deleted_models_visibility,
279
+ merged_models_visibility,
280
+ flagged_models_visibility,
281
+ search_bar,
282
+ ],
283
+ leaderboard_table,
284
+ queue=True,
285
+ )
286
 
287
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
288
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")