Roman Solomatin commited on
Commit
b688574
1 Parent(s): bd45af8

base working

Browse files
.pre-commit-config.yaml CHANGED
@@ -60,3 +60,10 @@ repos:
60
  - id: ruff-format
61
  types_or: [ python, pyi, jupyter ]
62
  args: [ --config, pyproject.toml ]
 
 
 
 
 
 
 
 
60
  - id: ruff-format
61
  types_or: [ python, pyi, jupyter ]
62
  args: [ --config, pyproject.toml ]
63
+
64
+ - repo: https://github.com/pdm-project/pdm
65
+ rev: 2.15.3
66
+ hooks:
67
+ - id: pdm-export
68
+ args: [ '-o', 'requirements.txt']
69
+ files: ^pdm.lock$
Makefile CHANGED
@@ -4,10 +4,11 @@
4
 
5
  style:
6
  ruff format
7
- pre-commit run --all-files
8
-
9
 
10
  quality:
11
  ruff check
12
 
 
 
 
13
  all: style quality
 
4
 
5
  style:
6
  ruff format
 
 
7
 
8
  quality:
9
  ruff check
10
 
11
+ pre-commit:
12
+ pre-commit run --all-files
13
+
14
  all: style quality
pdm.lock CHANGED
@@ -5,7 +5,7 @@
5
  groups = ["default", "lint"]
6
  strategy = ["cross_platform", "inherit_metadata"]
7
  lock_version = "4.4.1"
8
- content_hash = "sha256:ba1ca5b5cc998169567134133918478770dabd2af44598ba1f35371d0bb36083"
9
 
10
  [[package]]
11
  name = "aiofiles"
@@ -751,6 +751,26 @@ files = [
751
  {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
752
  ]
753
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
  [[package]]
755
  name = "pydantic"
756
  version = "2.7.4"
 
5
  groups = ["default", "lint"]
6
  strategy = ["cross_platform", "inherit_metadata"]
7
  lock_version = "4.4.1"
8
+ content_hash = "sha256:66e66d639b37e39bcbe01ff1d2345c10ada9d3e8c19397250879b6aea903b4b3"
9
 
10
  [[package]]
11
  name = "aiofiles"
 
751
  {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
752
  ]
753
 
754
+ [[package]]
755
+ name = "pyarrow"
756
+ version = "16.1.0"
757
+ requires_python = ">=3.8"
758
+ summary = "Python library for Apache Arrow"
759
+ groups = ["default"]
760
+ dependencies = [
761
+ "numpy>=1.16.6",
762
+ ]
763
+ files = [
764
+ {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"},
765
+ {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"},
766
+ {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef"},
767
+ {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848"},
768
+ {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c"},
769
+ {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd"},
770
+ {file = "pyarrow-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff"},
771
+ {file = "pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315"},
772
+ ]
773
+
774
  [[package]]
775
  name = "pydantic"
776
  version = "2.7.4"
pyproject.toml CHANGED
@@ -24,6 +24,7 @@ dependencies = [
24
  # "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463",
25
  # "accelerate",
26
  # "sentencepiece",
 
27
  ]
28
  requires-python = "==3.10.*"
29
  readme = "README.md"
 
24
  # "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463",
25
  # "accelerate",
26
  # "sentencepiece",
27
+ "pyarrow>=16.1.0",
28
  ]
29
  requires-python = "==3.10.*"
30
  readme = "README.md"
requirements.txt CHANGED
@@ -265,6 +265,15 @@ pillow==10.3.0 \
265
  --hash=sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015 \
266
  --hash=sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170 \
267
  --hash=sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f
 
 
 
 
 
 
 
 
 
268
  pydantic==2.7.4 \
269
  --hash=sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52 \
270
  --hash=sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0
 
265
  --hash=sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015 \
266
  --hash=sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170 \
267
  --hash=sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f
268
+ pyarrow==16.1.0 \
269
+ --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \
270
+ --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \
271
+ --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \
272
+ --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \
273
+ --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \
274
+ --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \
275
+ --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \
276
+ --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848
277
  pydantic==2.7.4 \
278
  --hash=sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52 \
279
  --hash=sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0
src/encodechka/app.py CHANGED
@@ -1,9 +1,6 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from about import (
4
- CITATION_BUTTON_LABEL,
5
- CITATION_BUTTON_TEXT,
6
- EVALUATION_QUEUE_TEXT,
7
  INTRODUCTION_TEXT,
8
  LLM_BENCHMARKS_TEXT,
9
  TITLE,
@@ -14,13 +11,11 @@ from display.utils import (
14
  BENCHMARK_COLS,
15
  COLS,
16
  EVAL_COLS,
17
- EVAL_TYPES,
18
  NUMERIC_INTERVALS,
19
  TYPES,
20
  AutoEvalColumn,
21
  ModelType,
22
  Precision,
23
- WeightType,
24
  fields,
25
  )
26
  from envs import (
@@ -67,7 +62,6 @@ try:
67
  except Exception:
68
  restart_space()
69
 
70
-
71
  raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
72
  leaderboard_df = original_df.copy()
73
 
@@ -156,100 +150,83 @@ def filter_models(
156
  return filtered_df
157
 
158
 
159
- demo = gr.Blocks(css=custom_css)
160
- with demo:
161
- gr.HTML(TITLE)
162
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
163
-
164
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
165
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
166
- with gr.Row():
167
- with gr.Column():
168
- with gr.Row():
169
- search_bar = gr.Textbox(
170
- placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
171
- show_label=False,
172
- elem_id="search-bar",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  )
174
- with gr.Row():
175
- shown_columns = gr.CheckboxGroup(
176
- choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
177
- value=[
178
- c.name
179
- for c in fields(AutoEvalColumn)
180
- if c.displayed_by_default and not c.hidden and not c.never_hidden
181
- ],
182
- label="Select columns to show",
183
- elem_id="column-select",
184
  interactive=True,
 
185
  )
186
- with gr.Row():
187
- deleted_models_visibility = gr.Checkbox(
188
- value=False,
189
- label="Show gated/private/deleted models",
190
  interactive=True,
 
191
  )
192
- with gr.Column(min_width=320):
193
- # with gr.Box(elem_id="box-filter"):
194
- filter_columns_type = gr.CheckboxGroup(
195
- label="Model types",
196
- choices=[t.to_str() for t in ModelType],
197
- value=[t.to_str() for t in ModelType],
198
- interactive=True,
199
- elem_id="filter-columns-type",
200
- )
201
- filter_columns_precision = gr.CheckboxGroup(
202
- label="Precision",
203
- choices=[i.value.name for i in Precision],
204
- value=[i.value.name for i in Precision],
205
- interactive=True,
206
- elem_id="filter-columns-precision",
207
- )
208
- filter_columns_size = gr.CheckboxGroup(
209
- label="Model sizes (in billions of parameters)",
210
- choices=list(NUMERIC_INTERVALS.keys()),
211
- value=list(NUMERIC_INTERVALS.keys()),
212
- interactive=True,
213
- elem_id="filter-columns-size",
214
- )
215
 
216
- leaderboard_table = gr.components.Dataframe(
217
- value=leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
218
- headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
219
- datatype=TYPES,
220
- elem_id="leaderboard-table",
221
- interactive=False,
222
- visible=True,
223
- )
 
 
224
 
225
- # Dummy leaderboard for handling the case when the user uses backspace key
226
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
227
- value=original_df[COLS],
228
- headers=COLS,
229
- datatype=TYPES,
230
- visible=False,
231
- )
232
- search_bar.submit(
233
- update_table,
234
- [
235
- hidden_leaderboard_table_for_search,
236
- shown_columns,
237
- filter_columns_type,
238
- filter_columns_precision,
239
- filter_columns_size,
240
- deleted_models_visibility,
241
- search_bar,
242
- ],
243
- leaderboard_table,
244
- )
245
- for selector in [
246
- shown_columns,
247
- filter_columns_type,
248
- filter_columns_precision,
249
- filter_columns_size,
250
- deleted_models_visibility,
251
- ]:
252
- selector.change(
253
  update_table,
254
  [
255
  hidden_leaderboard_table_for_search,
@@ -261,110 +238,138 @@ with demo:
261
  search_bar,
262
  ],
263
  leaderboard_table,
264
- queue=True,
265
  )
266
-
267
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
268
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
269
-
270
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
271
- with gr.Column():
272
- with gr.Row():
273
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
274
-
275
- with gr.Column():
276
- with gr.Accordion(
277
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
278
- open=False,
279
- ):
280
- with gr.Row():
281
- finished_eval_table = gr.components.Dataframe(
282
- value=finished_eval_queue_df,
283
- headers=EVAL_COLS,
284
- datatype=EVAL_TYPES,
285
- row_count=5,
286
- )
287
- with gr.Accordion(
288
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
289
- open=False,
290
- ):
291
- with gr.Row():
292
- running_eval_table = gr.components.Dataframe(
293
- value=running_eval_queue_df,
294
- headers=EVAL_COLS,
295
- datatype=EVAL_TYPES,
296
- row_count=5,
297
- )
298
-
299
- with gr.Accordion(
300
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
301
- open=False,
302
- ):
303
- with gr.Row():
304
- pending_eval_table = gr.components.Dataframe(
305
- value=pending_eval_queue_df,
306
- headers=EVAL_COLS,
307
- datatype=EVAL_TYPES,
308
- row_count=5,
309
- )
310
- with gr.Row():
311
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
312
-
313
- with gr.Row():
314
- with gr.Column():
315
- model_name_textbox = gr.Textbox(label="Model name")
316
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
317
- model_type = gr.Dropdown(
318
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
319
- label="Model type",
320
- multiselect=False,
321
- value=None,
322
- interactive=True,
323
- )
324
-
325
- with gr.Column():
326
- precision = gr.Dropdown(
327
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
328
- label="Precision",
329
- multiselect=False,
330
- value="float16",
331
- interactive=True,
332
- )
333
- weight_type = gr.Dropdown(
334
- choices=[i.value.name for i in WeightType],
335
- label="Weights type",
336
- multiselect=False,
337
- value="Original",
338
- interactive=True,
339
  )
340
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
341
-
342
- # submit_button = gr.Button("Submit Eval")
343
- # submission_result = gr.Markdown()
344
- # submit_button.click(
345
- # add_new_eval,
346
- # [
347
- # model_name_textbox,
348
- # base_model_name_textbox,
349
- # revision_name_textbox,
350
- # precision,
351
- # weight_type,
352
- # model_type,
353
- # ],
354
- # submission_result,
355
- # )
356
-
357
- with gr.Row():
358
- with gr.Accordion("📙 Citation", open=False):
359
- citation_button = gr.Textbox(
360
- value=CITATION_BUTTON_TEXT,
361
- label=CITATION_BUTTON_LABEL,
362
- lines=20,
363
- elem_id="citation-button",
364
- show_copy_button=True,
365
- )
366
 
367
- scheduler = BackgroundScheduler()
368
- scheduler.add_job(restart_space, "interval", seconds=1800)
369
- scheduler.start()
370
- demo.queue(default_concurrency_limit=40).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  from about import (
 
 
 
4
  INTRODUCTION_TEXT,
5
  LLM_BENCHMARKS_TEXT,
6
  TITLE,
 
11
  BENCHMARK_COLS,
12
  COLS,
13
  EVAL_COLS,
 
14
  NUMERIC_INTERVALS,
15
  TYPES,
16
  AutoEvalColumn,
17
  ModelType,
18
  Precision,
 
19
  fields,
20
  )
21
  from envs import (
 
62
  except Exception:
63
  restart_space()
64
 
 
65
  raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
66
  leaderboard_df = original_df.copy()
67
 
 
150
  return filtered_df
151
 
152
 
153
+ def build_app() -> gr.Blocks:
154
+ with gr.Blocks(css=custom_css) as app:
155
+ gr.HTML(TITLE)
156
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
157
+
158
+ with gr.Tabs(elem_classes="tab-buttons"):
159
+ with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
160
+ with gr.Row():
161
+ with gr.Column():
162
+ with gr.Row():
163
+ search_bar = gr.Textbox(
164
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) "
165
+ "and press ENTER...",
166
+ show_label=False,
167
+ elem_id="search-bar",
168
+ )
169
+ with gr.Row():
170
+ shown_columns = gr.CheckboxGroup(
171
+ choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
172
+ value=[
173
+ c.name
174
+ for c in fields(AutoEvalColumn)
175
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
176
+ ],
177
+ label="Select columns to show",
178
+ elem_id="column-select",
179
+ interactive=True,
180
+ )
181
+ with gr.Row():
182
+ deleted_models_visibility = gr.Checkbox(
183
+ value=False,
184
+ label="Show gated/private/deleted models",
185
+ interactive=True,
186
+ )
187
+ with gr.Column(min_width=320):
188
+ # with gr.Box(elem_id="box-filter"):
189
+ filter_columns_type = gr.CheckboxGroup(
190
+ label="Model types",
191
+ choices=[t.to_str() for t in ModelType],
192
+ value=[t.to_str() for t in ModelType],
193
+ interactive=True,
194
+ elem_id="filter-columns-type",
195
  )
196
+ filter_columns_precision = gr.CheckboxGroup(
197
+ label="Precision",
198
+ choices=[i.value.name for i in Precision],
199
+ value=[i.value.name for i in Precision],
 
 
 
 
 
 
200
  interactive=True,
201
+ elem_id="filter-columns-precision",
202
  )
203
+ filter_columns_size = gr.CheckboxGroup(
204
+ label="Model sizes (in billions of parameters)",
205
+ choices=list(NUMERIC_INTERVALS.keys()),
206
+ value=list(NUMERIC_INTERVALS.keys()),
207
  interactive=True,
208
+ elem_id="filter-columns-size",
209
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ leaderboard_table = gr.components.Dataframe(
212
+ value=leaderboard_df[
213
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value
214
+ ],
215
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
216
+ datatype=TYPES,
217
+ elem_id="leaderboard-table",
218
+ interactive=False,
219
+ visible=True,
220
+ )
221
 
222
+ # Dummy leaderboard for handling the case when the user uses backspace key
223
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
224
+ value=original_df[COLS],
225
+ headers=COLS,
226
+ datatype=TYPES,
227
+ visible=False,
228
+ )
229
+ search_bar.submit(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  update_table,
231
  [
232
  hidden_leaderboard_table_for_search,
 
238
  search_bar,
239
  ],
240
  leaderboard_table,
 
241
  )
242
+ for selector in [
243
+ shown_columns,
244
+ filter_columns_type,
245
+ filter_columns_precision,
246
+ filter_columns_size,
247
+ deleted_models_visibility,
248
+ ]:
249
+ selector.change(
250
+ update_table,
251
+ [
252
+ hidden_leaderboard_table_for_search,
253
+ shown_columns,
254
+ filter_columns_type,
255
+ filter_columns_precision,
256
+ filter_columns_size,
257
+ deleted_models_visibility,
258
+ search_bar,
259
+ ],
260
+ leaderboard_table,
261
+ queue=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
+ with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
265
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
266
+
267
+ # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
268
+ # with gr.Column():
269
+ # with gr.Row():
270
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
271
+ #
272
+ # with gr.Column():
273
+ # with gr.Accordion(
274
+ # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
275
+ # open=False,
276
+ # ):
277
+ # with gr.Row():
278
+ # finished_eval_table = gr.components.Dataframe(
279
+ # value=finished_eval_queue_df,
280
+ # headers=EVAL_COLS,
281
+ # datatype=EVAL_TYPES,
282
+ # row_count=5,
283
+ # )
284
+ # with gr.Accordion(
285
+ # f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
286
+ # open=False,
287
+ # ):
288
+ # with gr.Row():
289
+ # running_eval_table = gr.components.Dataframe(
290
+ # value=running_eval_queue_df,
291
+ # headers=EVAL_COLS,
292
+ # datatype=EVAL_TYPES,
293
+ # row_count=5,
294
+ # )
295
+ #
296
+ # with gr.Accordion(
297
+ # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
298
+ # open=False,
299
+ # ):
300
+ # with gr.Row():
301
+ # pending_eval_table = gr.components.Dataframe(
302
+ # value=pending_eval_queue_df,
303
+ # headers=EVAL_COLS,
304
+ # datatype=EVAL_TYPES,
305
+ # row_count=5,
306
+ # )
307
+ # with gr.Row():
308
+ # gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
309
+ #
310
+ # with gr.Row():
311
+ # with gr.Column():
312
+ # model_name_textbox = gr.Textbox(label="Model name")
313
+ # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
314
+ # model_type = gr.Dropdown(
315
+ # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
316
+ # label="Model type",
317
+ # multiselect=False,
318
+ # value=None,
319
+ # interactive=True,
320
+ # )
321
+ #
322
+ # with gr.Column():
323
+ # precision = gr.Dropdown(
324
+ # choices=[i.value.name for i in Precision if i != Precision.Unknown],
325
+ # label="Precision",
326
+ # multiselect=False,
327
+ # value="float16",
328
+ # interactive=True,
329
+ # )
330
+ # weight_type = gr.Dropdown(
331
+ # choices=[i.value.name for i in WeightType],
332
+ # label="Weights type",
333
+ # multiselect=False,
334
+ # value="Original",
335
+ # interactive=True,
336
+ # )
337
+ # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
338
+ #
339
+ # submit_button = gr.Button("Submit Eval")
340
+ # submission_result = gr.Markdown()
341
+ # submit_button.click(
342
+ # add_new_eval,
343
+ # [
344
+ # model_name_textbox,
345
+ # base_model_name_textbox,
346
+ # revision_name_textbox,
347
+ # precision,
348
+ # weight_type,
349
+ # model_type,
350
+ # ],
351
+ # submission_result,
352
+ # )
353
+ #
354
+ # with gr.Row():
355
+ # with gr.Accordion("📙 Citation", open=False):
356
+ # citation_button = gr.Textbox(
357
+ # value=CITATION_BUTTON_TEXT,
358
+ # label=CITATION_BUTTON_LABEL,
359
+ # lines=20,
360
+ # elem_id="citation-button",
361
+ # show_copy_button=True,
362
+ # )
363
+ return app
364
+
365
+
366
+ def main():
367
+ app = build_app()
368
+ scheduler = BackgroundScheduler()
369
+ scheduler.add_job(restart_space, "interval", seconds=1800)
370
+ scheduler.start()
371
+ app.queue(default_concurrency_limit=40).launch()
372
+
373
+
374
+ if __name__ == "__main__":
375
+ main()
src/encodechka/display/formatting.py CHANGED
@@ -1,5 +1,8 @@
1
  def model_hyperlink(link, model_name):
2
- return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 
 
 
3
 
4
 
5
  def make_clickable_model(model_name):
 
1
  def model_hyperlink(link, model_name):
2
+ return (
3
+ f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;'
4
+ f'text-decoration-style: dotted;">{model_name}</a>'
5
+ )
6
 
7
 
8
  def make_clickable_model(model_name):
src/encodechka/display/utils.py CHANGED
@@ -2,8 +2,7 @@ from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
 
4
  import pandas as pd
5
-
6
- from ..about import Tasks
7
 
8
 
9
  def fields(raw_class):
@@ -23,42 +22,38 @@ class ColumnContent:
23
 
24
 
25
  ## Leaderboard columns
26
- auto_eval_column_dict = []
27
- # Init
28
- auto_eval_column_dict.append(
29
- [
30
  "model_type_symbol",
31
  ColumnContent,
32
  ColumnContent("T", "str", True, never_hidden=True),
33
- ]
34
- )
35
- auto_eval_column_dict.append(
36
- [
37
  "model",
38
  ColumnContent,
39
  ColumnContent("Model", "markdown", True, never_hidden=True),
40
- ]
41
- )
42
  # Scores
43
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
44
  for task in Tasks:
45
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
46
  # Model information
47
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
48
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
49
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
50
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
51
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
52
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
53
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
54
  auto_eval_column_dict.append(
55
- [
56
  "still_on_hub",
57
  ColumnContent,
58
  ColumnContent("Available on the hub", "bool", False),
59
- ]
60
  )
61
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
62
 
63
  # We use make dataclass to dynamically fill the scores from Tasks
64
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
2
  from enum import Enum
3
 
4
  import pandas as pd
5
+ from about import Tasks
 
6
 
7
 
8
  def fields(raw_class):
 
22
 
23
 
24
  ## Leaderboard columns
25
+ auto_eval_column_dict = [
26
+ (
 
 
27
  "model_type_symbol",
28
  ColumnContent,
29
  ColumnContent("T", "str", True, never_hidden=True),
30
+ ),
31
+ (
 
 
32
  "model",
33
  ColumnContent,
34
  ColumnContent("Model", "markdown", True, never_hidden=True),
35
+ ),
36
+ ]
37
  # Scores
38
+ auto_eval_column_dict.append(("average", ColumnContent, ColumnContent("Average ⬆️", "number", True)))
39
  for task in Tasks:
40
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
41
  # Model information
42
+ auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False)))
43
+ auto_eval_column_dict.append(("architecture", ColumnContent, ColumnContent("Architecture", "str", False)))
44
+ auto_eval_column_dict.append(("weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)))
45
+ auto_eval_column_dict.append(("precision", ColumnContent, ColumnContent("Precision", "str", False)))
46
+ auto_eval_column_dict.append(("license", ColumnContent, ColumnContent("Hub License", "str", False)))
47
+ auto_eval_column_dict.append(("params", ColumnContent, ColumnContent("#Params (B)", "number", False)))
48
+ auto_eval_column_dict.append(("likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)))
49
  auto_eval_column_dict.append(
50
+ (
51
  "still_on_hub",
52
  ColumnContent,
53
  ColumnContent("Available on the hub", "bool", False),
54
+ )
55
  )
56
+ auto_eval_column_dict.append(("revision", ColumnContent, ColumnContent("Model sha", "str", False, False)))
57
 
58
  # We use make dataclass to dynamically fill the scores from Tasks
59
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
src/encodechka/envs.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
6
  # ----------------------------------
7
  TOKEN = os.environ.get("TOKEN") # A read/write token for your org
8
 
9
- OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"
 
6
  # ----------------------------------
7
  TOKEN = os.environ.get("TOKEN") # A read/write token for your org
8
 
9
+ OWNER = "demo-leaderboard-backend"
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"
src/encodechka/leaderboard/read_evals.py CHANGED
@@ -5,10 +5,8 @@ from dataclasses import dataclass
5
 
6
  import dateutil
7
  import numpy as np
8
-
9
- from ..display.formatting import make_clickable_model
10
- from ..display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
11
- from ..submission.check_validity import is_model_on_hub
12
 
13
 
14
  @dataclass
@@ -56,17 +54,17 @@ class EvalResult:
56
  result_key = f"{org}_{model}_{precision.value.name}"
57
  full_model = "/".join(org_and_model)
58
 
59
- still_on_hub, _, model_config = is_model_on_hub(
60
- full_model,
61
- config.get("model_sha", "main"),
62
- trust_remote_code=True,
63
- test_tokenizer=False,
64
- )
65
- architecture = "?"
66
- if model_config is not None:
67
- architectures = getattr(model_config, "architectures", None)
68
- if architectures:
69
- architecture = ";".join(architectures)
70
 
71
  # Extract results available in this file (some results are split in several files)
72
  results = {}
@@ -89,8 +87,8 @@ class EvalResult:
89
  results=results,
90
  precision=precision,
91
  revision=config.get("model_sha", ""),
92
- still_on_hub=still_on_hub,
93
- architecture=architecture,
94
  )
95
 
96
  def update_with_request_file(self, requests_path):
 
5
 
6
  import dateutil
7
  import numpy as np
8
+ from display.formatting import make_clickable_model
9
+ from display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
 
 
10
 
11
 
12
  @dataclass
 
54
  result_key = f"{org}_{model}_{precision.value.name}"
55
  full_model = "/".join(org_and_model)
56
 
57
+ # still_on_hub, _, model_config = is_model_on_hub(
58
+ # full_model,
59
+ # config.get("model_sha", "main"),
60
+ # trust_remote_code=True,
61
+ # test_tokenizer=False,
62
+ # )
63
+ # architecture = "?"
64
+ # if model_config is not None:
65
+ # architectures = getattr(model_config, "architectures", None)
66
+ # if architectures:
67
+ # architecture = ";".join(architectures)
68
 
69
  # Extract results available in this file (some results are split in several files)
70
  results = {}
 
87
  results=results,
88
  precision=precision,
89
  revision=config.get("model_sha", ""),
90
+ # still_on_hub=still_on_hub,
91
+ # architecture=architecture,
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
src/encodechka/populate.py CHANGED
@@ -1,13 +1,16 @@
1
  import json
2
  import os
 
3
 
4
  import pandas as pd
5
  from display.formatting import has_no_nan_values, make_clickable_model
6
  from display.utils import AutoEvalColumn, EvalQueueColumn
7
- from leaderboard.read_evals import get_raw_eval_results
8
 
9
 
10
- def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
 
 
11
  """Creates a dataframe from all the individual experiment results"""
12
  raw_data = get_raw_eval_results(results_path, requests_path)
13
  all_data_json = [v.to_dict() for v in raw_data]
@@ -21,7 +24,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
21
  return raw_data, df
22
 
23
 
24
- def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
25
  """Creates the different dataframes for the evaluation queues requestes"""
26
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
27
  all_evals = []
 
1
  import json
2
  import os
3
+ from typing import Any
4
 
5
  import pandas as pd
6
  from display.formatting import has_no_nan_values, make_clickable_model
7
  from display.utils import AutoEvalColumn, EvalQueueColumn
8
+ from leaderboard.read_evals import EvalResult, get_raw_eval_results
9
 
10
 
11
+ def get_leaderboard_df(
12
+ results_path: str, requests_path: str, cols: list, benchmark_cols: list
13
+ ) -> tuple[list[EvalResult], Any]:
14
  """Creates a dataframe from all the individual experiment results"""
15
  raw_data = get_raw_eval_results(results_path, requests_path)
16
  all_data_json = [v.to_dict() for v in raw_data]
 
24
  return raw_data, df
25
 
26
 
27
+ def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
28
  """Creates the different dataframes for the evaluation queues requestes"""
29
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
30
  all_evals = []
src/encodechka/submission/check_validity.py CHANGED
@@ -34,56 +34,63 @@
34
  # return True, ""
35
  #
36
  #
37
- # def is_model_on_hub(
38
- # model_name: str,
39
- # revision: str,
40
- # token: str | None = None,
41
- # trust_remote_code=False,
42
- # test_tokenizer=False,
43
- # ) -> tuple[bool, str]:
44
- # """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
45
- # try:
46
- # config = AutoConfig.from_pretrained(
47
- # model_name,
48
- # revision=revision,
49
- # trust_remote_code=trust_remote_code,
50
- # token=token,
51
- # )
52
- # if test_tokenizer:
53
- # try:
54
- # tk = AutoTokenizer.from_pretrained(
55
- # model_name,
56
- # revision=revision,
57
- # trust_remote_code=trust_remote_code,
58
- # token=token,
59
- # )
60
- # except ValueError as e:
61
- # return (
62
- # False,
63
- # f"uses a tokenizer which is not in a transformers release: {e}",
64
- # None,
65
- # )
66
- # except Exception:
67
- # return (
68
- # False,
69
- # "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
70
- # None,
71
- # )
72
- # return True, None, config
73
- #
74
- # except ValueError:
75
- # return (
76
- # False,
77
- # "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
78
- # None,
79
- # )
80
- #
81
- # except Exception:
82
- # return False, "was not found on hub!", None
 
 
 
 
 
 
83
  #
84
  #
85
  # def get_model_size(model_info: ModelInfo, precision: str):
86
- # """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
 
87
  # try:
88
  # model_size = round(model_info.safetensors["total"] / 1e9, 3)
89
  # except (AttributeError, TypeError):
 
34
  # return True, ""
35
  #
36
  #
37
+ def is_model_on_hub(
38
+ model_name: str,
39
+ revision: str,
40
+ token: str | None = None,
41
+ trust_remote_code=False,
42
+ test_tokenizer=False,
43
+ ) -> tuple[bool, str]:
44
+ """Checks if the model model_name is on the hub,
45
+ and whether it (and its tokenizer) can be loaded with AutoClasses."""
46
+ raise NotImplementedError("Replace with huggingface_hub API")
47
+ # try:
48
+ # config = AutoConfig.from_pretrained(
49
+ # model_name,
50
+ # revision=revision,
51
+ # trust_remote_code=trust_remote_code,
52
+ # token=token,
53
+ # )
54
+ # if test_tokenizer:
55
+ # try:
56
+ # tk = AutoTokenizer.from_pretrained(
57
+ # model_name,
58
+ # revision=revision,
59
+ # trust_remote_code=trust_remote_code,
60
+ # token=token,
61
+ # )
62
+ # except ValueError as e:
63
+ # return (
64
+ # False,
65
+ # f"uses a tokenizer which is not in a transformers release: {e}",
66
+ # None,
67
+ # )
68
+ # except Exception:
69
+ # return (
70
+ # False,
71
+ # "'s tokenizer cannot be loaded. Is your tokenizer class in a
72
+ # stable transformers release, and correctly configured?",
73
+ # None,
74
+ # )
75
+ # return True, None, config
76
+ #
77
+ # except ValueError:
78
+ # return (
79
+ # False,
80
+ # "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow
81
+ # these models to be automatically submitted to the leaderboard.",
82
+ # None,
83
+ # )
84
+ #
85
+ # except Exception:
86
+ # return False, "was not found on hub!", None
87
+
88
+
89
  #
90
  #
91
  # def get_model_size(model_info: ModelInfo, precision: str):
92
+ # """Gets the model size from the configuration, or the model name if the
93
+ # configuration does not contain the information."""
94
  # try:
95
  # model_size = round(model_info.safetensors["total"] / 1e9, 3)
96
  # except (AttributeError, TypeError):
src/encodechka/submission/submit.py CHANGED
@@ -53,7 +53,9 @@
53
  # return styled_error(f'Base model "{base_model}" {error}')
54
  #
55
  # if not weight_type == "Adapter":
56
- # model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
 
 
57
  # if not model_on_hub:
58
  # return styled_error(f'Model "{model}" {error}')
59
  #
@@ -118,5 +120,6 @@
118
  # os.remove(out_path)
119
  #
120
  # return styled_message(
121
- # "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
 
122
  # )
 
53
  # return styled_error(f'Base model "{base_model}" {error}')
54
  #
55
  # if not weight_type == "Adapter":
56
+ # model_on_hub, error, _ = is_model_on_hub(
57
+ # model_name=model, revision=revision, token=TOKEN, test_tokenizer=True
58
+ # )
59
  # if not model_on_hub:
60
  # return styled_error(f'Model "{model}" {error}')
61
  #
 
120
  # os.remove(out_path)
121
  #
122
  # return styled_message(
123
+ # "Your request has been submitted to the evaluation queue!\n
124
+ # Please wait for up to an hour for the model to show in the PENDING list."
125
  # )