Spaces:

Samoed
/

Encodechka

Running

App Files Files Community

Roman Solomatin commited on Jun 13

Commit

b688574

•

1 Parent(s): bd45af8

base working

Browse files

Files changed (13) hide show

.pre-commit-config.yaml +7 -0
Makefile +3 -2
pdm.lock +21 -1
pyproject.toml +1 -0
requirements.txt +9 -0
src/encodechka/app.py +202 -197
src/encodechka/display/formatting.py +4 -1
src/encodechka/display/utils.py +18 -23
src/encodechka/envs.py +1 -1
src/encodechka/leaderboard/read_evals.py +15 -17
src/encodechka/populate.py +6 -3
src/encodechka/submission/check_validity.py +54 -47
src/encodechka/submission/submit.py +5 -2

.pre-commit-config.yaml CHANGED Viewed

@@ -60,3 +60,10 @@ repos:
       - id: ruff-format
         types_or: [ python, pyi, jupyter ]
         args: [ --config, pyproject.toml ]

       - id: ruff-format
         types_or: [ python, pyi, jupyter ]
         args: [ --config, pyproject.toml ]
+  - repo: https://github.com/pdm-project/pdm
+    rev: 2.15.3
+    hooks:
+      - id: pdm-export
+        args: [ '-o', 'requirements.txt']
+        files: ^pdm.lock$

Makefile CHANGED Viewed

@@ -4,10 +4,11 @@
 style:
 	ruff format
-	pre-commit run --all-files
 quality:
 	ruff check
 all: style quality

 style:
 	ruff format
 quality:
 	ruff check
+pre-commit:
+	pre-commit run --all-files
 all: style quality

pdm.lock CHANGED Viewed

@@ -5,7 +5,7 @@
 groups = ["default", "lint"]
 strategy = ["cross_platform", "inherit_metadata"]
 lock_version = "4.4.1"
-content_hash = "sha256:ba1ca5b5cc998169567134133918478770dabd2af44598ba1f35371d0bb36083"
 [[package]]
 name = "aiofiles"
@@ -751,6 +751,26 @@ files = [
     {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
 ]
 [[package]]
 name = "pydantic"
 version = "2.7.4"

 groups = ["default", "lint"]
 strategy = ["cross_platform", "inherit_metadata"]
 lock_version = "4.4.1"
+content_hash = "sha256:66e66d639b37e39bcbe01ff1d2345c10ada9d3e8c19397250879b6aea903b4b3"
 [[package]]
 name = "aiofiles"
     {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
 ]
+[[package]]
+name = "pyarrow"
+version = "16.1.0"
+requires_python = ">=3.8"
+summary = "Python library for Apache Arrow"
+groups = ["default"]
+dependencies = [
+    "numpy>=1.16.6",
+]
+files = [
+    {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"},
+    {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd"},
+    {file = "pyarrow-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff"},
+    {file = "pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315"},
+]
 [[package]]
 name = "pydantic"
 version = "2.7.4"

pyproject.toml CHANGED Viewed

@@ -24,6 +24,7 @@ dependencies = [
     #    "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463",
     #    "accelerate",
     #    "sentencepiece",
 ]
 requires-python = "==3.10.*"
 readme = "README.md"

     #    "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@b281b0921b636bc36ad05c0b0b0763bd6dd43463",
     #    "accelerate",
     #    "sentencepiece",
+    "pyarrow>=16.1.0",
 ]
 requires-python = "==3.10.*"
 readme = "README.md"

requirements.txt CHANGED Viewed

@@ -265,6 +265,15 @@ pillow==10.3.0 \
     --hash=sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015 \
     --hash=sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170 \
     --hash=sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f
 pydantic==2.7.4 \
     --hash=sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52 \
     --hash=sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0

     --hash=sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015 \
     --hash=sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170 \
     --hash=sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f
+pyarrow==16.1.0 \
+    --hash=sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315 \
+    --hash=sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9 \
+    --hash=sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a \
+    --hash=sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd \
+    --hash=sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef \
+    --hash=sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff \
+    --hash=sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c \
+    --hash=sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848
 pydantic==2.7.4 \
     --hash=sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52 \
     --hash=sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0

src/encodechka/app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 import gradio as gr
 import pandas as pd
 from about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
@@ -14,13 +11,11 @@ from display.utils import (
     BENCHMARK_COLS,
     COLS,
     EVAL_COLS,
-    EVAL_TYPES,
     NUMERIC_INTERVALS,
     TYPES,
     AutoEvalColumn,
     ModelType,
     Precision,
-    WeightType,
     fields,
 )
 from envs import (
@@ -67,7 +62,6 @@ try:
 except Exception:
     restart_space()
 raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 leaderboard_df = original_df.copy()
@@ -156,100 +150,83 @@ def filter_models(
     return filtered_df
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            with gr.Row():
-                with gr.Column():
-                    with gr.Row():
-                        search_bar = gr.Textbox(
-                            placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
-                            show_label=False,
-                            elem_id="search-bar",
                         )
-                    with gr.Row():
-                        shown_columns = gr.CheckboxGroup(
-                            choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
-                            value=[
-                                c.name
-                                for c in fields(AutoEvalColumn)
-                                if c.displayed_by_default and not c.hidden and not c.never_hidden
-                            ],
-                            label="Select columns to show",
-                            elem_id="column-select",
                             interactive=True,
                         )
-                    with gr.Row():
-                        deleted_models_visibility = gr.Checkbox(
-                            value=False,
-                            label="Show gated/private/deleted models",
                             interactive=True,
                         )
-                with gr.Column(min_width=320):
-                    # with gr.Box(elem_id="box-filter"):
-                    filter_columns_type = gr.CheckboxGroup(
-                        label="Model types",
-                        choices=[t.to_str() for t in ModelType],
-                        value=[t.to_str() for t in ModelType],
-                        interactive=True,
-                        elem_id="filter-columns-type",
-                    )
-                    filter_columns_precision = gr.CheckboxGroup(
-                        label="Precision",
-                        choices=[i.value.name for i in Precision],
-                        value=[i.value.name for i in Precision],
-                        interactive=True,
-                        elem_id="filter-columns-precision",
-                    )
-                    filter_columns_size = gr.CheckboxGroup(
-                        label="Model sizes (in billions of parameters)",
-                        choices=list(NUMERIC_INTERVALS.keys()),
-                        value=list(NUMERIC_INTERVALS.keys()),
-                        interactive=True,
-                        elem_id="filter-columns-size",
-                    )
-            leaderboard_table = gr.components.Dataframe(
-                value=leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
-                headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
-                datatype=TYPES,
-                elem_id="leaderboard-table",
-                interactive=False,
-                visible=True,
-            )
-            # Dummy leaderboard for handling the case when the user uses backspace key
-            hidden_leaderboard_table_for_search = gr.components.Dataframe(
-                value=original_df[COLS],
-                headers=COLS,
-                datatype=TYPES,
-                visible=False,
-            )
-            search_bar.submit(
-                update_table,
-                [
-                    hidden_leaderboard_table_for_search,
-                    shown_columns,
-                    filter_columns_type,
-                    filter_columns_precision,
-                    filter_columns_size,
-                    deleted_models_visibility,
-                    search_bar,
-                ],
-                leaderboard_table,
-            )
-            for selector in [
-                shown_columns,
-                filter_columns_type,
-                filter_columns_precision,
-                filter_columns_size,
-                deleted_models_visibility,
-            ]:
-                selector.change(
                     update_table,
                     [
                         hidden_leaderboard_table_for_search,
@@ -261,110 +238,138 @@ with demo:
                         search_bar,
                     ],
                     leaderboard_table,
-                    queue=True,
                 )
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
                     )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            # submit_button = gr.Button("Submit Eval")
-            # submission_result = gr.Markdown()
-            # submit_button.click(
-            #     add_new_eval,
-            #     [
-            #         model_name_textbox,
-            #         base_model_name_textbox,
-            #         revision_name_textbox,
-            #         precision,
-            #         weight_type,
-            #         model_type,
-            #     ],
-            #     submission_result,
-            # )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

 import gradio as gr
 import pandas as pd
 from about import (
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
     BENCHMARK_COLS,
     COLS,
     EVAL_COLS,
     NUMERIC_INTERVALS,
     TYPES,
     AutoEvalColumn,
     ModelType,
     Precision,
     fields,
 )
 from envs import (
 except Exception:
     restart_space()
 raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 leaderboard_df = original_df.copy()
     return filtered_df
+def build_app() -> gr.Blocks:
+    with gr.Blocks(css=custom_css) as app:
+        gr.HTML(TITLE)
+        gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+        with gr.Tabs(elem_classes="tab-buttons"):
+            with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            search_bar = gr.Textbox(
+                                placeholder=" 🔍 Search for your model (separate multiple queries with `;`) "
+                                "and press ENTER...",
+                                show_label=False,
+                                elem_id="search-bar",
+                            )
+                        with gr.Row():
+                            shown_columns = gr.CheckboxGroup(
+                                choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
+                                value=[
+                                    c.name
+                                    for c in fields(AutoEvalColumn)
+                                    if c.displayed_by_default and not c.hidden and not c.never_hidden
+                                ],
+                                label="Select columns to show",
+                                elem_id="column-select",
+                                interactive=True,
+                            )
+                        with gr.Row():
+                            deleted_models_visibility = gr.Checkbox(
+                                value=False,
+                                label="Show gated/private/deleted models",
+                                interactive=True,
+                            )
+                    with gr.Column(min_width=320):
+                        # with gr.Box(elem_id="box-filter"):
+                        filter_columns_type = gr.CheckboxGroup(
+                            label="Model types",
+                            choices=[t.to_str() for t in ModelType],
+                            value=[t.to_str() for t in ModelType],
+                            interactive=True,
+                            elem_id="filter-columns-type",
                         )
+                        filter_columns_precision = gr.CheckboxGroup(
+                            label="Precision",
+                            choices=[i.value.name for i in Precision],
+                            value=[i.value.name for i in Precision],
                             interactive=True,
+                            elem_id="filter-columns-precision",
                         )
+                        filter_columns_size = gr.CheckboxGroup(
+                            label="Model sizes (in billions of parameters)",
+                            choices=list(NUMERIC_INTERVALS.keys()),
+                            value=list(NUMERIC_INTERVALS.keys()),
                             interactive=True,
+                            elem_id="filter-columns-size",
                         )
+                leaderboard_table = gr.components.Dataframe(
+                    value=leaderboard_df[
+                        [c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value
+                    ],
+                    headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
+                    datatype=TYPES,
+                    elem_id="leaderboard-table",
+                    interactive=False,
+                    visible=True,
+                )
+                # Dummy leaderboard for handling the case when the user uses backspace key
+                hidden_leaderboard_table_for_search = gr.components.Dataframe(
+                    value=original_df[COLS],
+                    headers=COLS,
+                    datatype=TYPES,
+                    visible=False,
+                )
+                search_bar.submit(
                     update_table,
                     [
                         hidden_leaderboard_table_for_search,
                         search_bar,
                     ],
                     leaderboard_table,
                 )
+                for selector in [
+                    shown_columns,
+                    filter_columns_type,
+                    filter_columns_precision,
+                    filter_columns_size,
+                    deleted_models_visibility,
+                ]:
+                    selector.change(
+                        update_table,
+                        [
+                            hidden_leaderboard_table_for_search,
+                            shown_columns,
+                            filter_columns_type,
+                            filter_columns_precision,
+                            filter_columns_size,
+                            deleted_models_visibility,
+                            search_bar,
+                        ],
+                        leaderboard_table,
+                        queue=True,
                     )
+            with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
+                gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        #     with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
+        #         with gr.Column():
+        #             with gr.Row():
+        #                 gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+        #
+        #             with gr.Column():
+        #                 with gr.Accordion(
+        #                     f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
+        #                     open=False,
+        #                 ):
+        #                     with gr.Row():
+        #                         finished_eval_table = gr.components.Dataframe(
+        #                             value=finished_eval_queue_df,
+        #                             headers=EVAL_COLS,
+        #                             datatype=EVAL_TYPES,
+        #                             row_count=5,
+        #                         )
+        #                 with gr.Accordion(
+        #                     f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
+        #                     open=False,
+        #                 ):
+        #                     with gr.Row():
+        #                         running_eval_table = gr.components.Dataframe(
+        #                             value=running_eval_queue_df,
+        #                             headers=EVAL_COLS,
+        #                             datatype=EVAL_TYPES,
+        #                             row_count=5,
+        #                         )
+        #
+        #                 with gr.Accordion(
+        #                     f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
+        #                     open=False,
+        #                 ):
+        #                     with gr.Row():
+        #                         pending_eval_table = gr.components.Dataframe(
+        #                             value=pending_eval_queue_df,
+        #                             headers=EVAL_COLS,
+        #                             datatype=EVAL_TYPES,
+        #                             row_count=5,
+        #                         )
+        #         with gr.Row():
+        #             gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+        #
+        #         with gr.Row():
+        #             with gr.Column():
+        #                 model_name_textbox = gr.Textbox(label="Model name")
+        #                 revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+        #                 model_type = gr.Dropdown(
+        #                     choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
+        #                     label="Model type",
+        #                     multiselect=False,
+        #                     value=None,
+        #                     interactive=True,
+        #                 )
+        #
+        #             with gr.Column():
+        #                 precision = gr.Dropdown(
+        #                     choices=[i.value.name for i in Precision if i != Precision.Unknown],
+        #                     label="Precision",
+        #                     multiselect=False,
+        #                     value="float16",
+        #                     interactive=True,
+        #                 )
+        #                 weight_type = gr.Dropdown(
+        #                     choices=[i.value.name for i in WeightType],
+        #                     label="Weights type",
+        #                     multiselect=False,
+        #                     value="Original",
+        #                     interactive=True,
+        #                 )
+        #                 base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+        #
+        #         submit_button = gr.Button("Submit Eval")
+        #         submission_result = gr.Markdown()
+        #         submit_button.click(
+        #             add_new_eval,
+        #             [
+        #                 model_name_textbox,
+        #                 base_model_name_textbox,
+        #                 revision_name_textbox,
+        #                 precision,
+        #                 weight_type,
+        #                 model_type,
+        #             ],
+        #             submission_result,
+        #         )
+        #
+        # with gr.Row():
+        #     with gr.Accordion("📙 Citation", open=False):
+        #         citation_button = gr.Textbox(
+        #             value=CITATION_BUTTON_TEXT,
+        #             label=CITATION_BUTTON_LABEL,
+        #             lines=20,
+        #             elem_id="citation-button",
+        #             show_copy_button=True,
+        #         )
+        return app
+def main():
+    app = build_app()
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(restart_space, "interval", seconds=1800)
+    scheduler.start()
+    app.queue(default_concurrency_limit=40).launch()
+if __name__ == "__main__":
+    main()

src/encodechka/display/formatting.py CHANGED Viewed

@@ -1,5 +1,8 @@
 def model_hyperlink(link, model_name):
-    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def make_clickable_model(model_name):

 def model_hyperlink(link, model_name):
+    return (
+        f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;'
+        f'text-decoration-style: dotted;">{model_name}</a>'
+    )
 def make_clickable_model(model_name):

src/encodechka/display/utils.py CHANGED Viewed

@@ -2,8 +2,7 @@ from dataclasses import dataclass, make_dataclass
 from enum import Enum
 import pandas as pd
-from ..about import Tasks
 def fields(raw_class):
@@ -23,42 +22,38 @@ class ColumnContent:
 ## Leaderboard columns
-auto_eval_column_dict = []
-# Init
-auto_eval_column_dict.append(
-    [
         "model_type_symbol",
         ColumnContent,
         ColumnContent("T", "str", True, never_hidden=True),
-    ]
-)
-auto_eval_column_dict.append(
-    [
         "model",
         ColumnContent,
         ColumnContent("Model", "markdown", True, never_hidden=True),
-    ]
-)
 # Scores
-auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
 for task in Tasks:
     auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 # Model information
-auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
-auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
-auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
-auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
-auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
-auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
-auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
 auto_eval_column_dict.append(
-    [
         "still_on_hub",
         ColumnContent,
         ColumnContent("Available on the hub", "bool", False),
-    ]
 )
-auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

 from enum import Enum
 import pandas as pd
+from about import Tasks
 def fields(raw_class):
 ## Leaderboard columns
+auto_eval_column_dict = [
+    (
         "model_type_symbol",
         ColumnContent,
         ColumnContent("T", "str", True, never_hidden=True),
+    ),
+    (
         "model",
         ColumnContent,
         ColumnContent("Model", "markdown", True, never_hidden=True),
+    ),
+]
 # Scores
+auto_eval_column_dict.append(("average", ColumnContent, ColumnContent("Average ⬆️", "number", True)))
 for task in Tasks:
     auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 # Model information
+auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False)))
+auto_eval_column_dict.append(("architecture", ColumnContent, ColumnContent("Architecture", "str", False)))
+auto_eval_column_dict.append(("weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)))
+auto_eval_column_dict.append(("precision", ColumnContent, ColumnContent("Precision", "str", False)))
+auto_eval_column_dict.append(("license", ColumnContent, ColumnContent("Hub License", "str", False)))
+auto_eval_column_dict.append(("params", ColumnContent, ColumnContent("#Params (B)", "number", False)))
+auto_eval_column_dict.append(("likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)))
 auto_eval_column_dict.append(
+    (
         "still_on_hub",
         ColumnContent,
         ColumnContent("Available on the hub", "bool", False),
+    )
 )
+auto_eval_column_dict.append(("revision", ColumnContent, ColumnContent("Model sha", "str", False, False)))
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)

src/encodechka/envs.py CHANGED Viewed

@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
 # ----------------------------------
 TOKEN = os.environ.get("TOKEN")  # A read/write token for your org
-OWNER = "demo-leaderboard-backend"  # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/leaderboard"

 # ----------------------------------
 TOKEN = os.environ.get("TOKEN")  # A read/write token for your org
+OWNER = "demo-leaderboard-backend"
 # ----------------------------------
 REPO_ID = f"{OWNER}/leaderboard"

src/encodechka/leaderboard/read_evals.py CHANGED Viewed

@@ -5,10 +5,8 @@ from dataclasses import dataclass
 import dateutil
 import numpy as np
-from ..display.formatting import make_clickable_model
-from ..display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
-from ..submission.check_validity import is_model_on_hub
 @dataclass
@@ -56,17 +54,17 @@ class EvalResult:
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
-        still_on_hub, _, model_config = is_model_on_hub(
-            full_model,
-            config.get("model_sha", "main"),
-            trust_remote_code=True,
-            test_tokenizer=False,
-        )
-        architecture = "?"
-        if model_config is not None:
-            architectures = getattr(model_config, "architectures", None)
-            if architectures:
-                architecture = ";".join(architectures)
         # Extract results available in this file (some results are split in several files)
         results = {}
@@ -89,8 +87,8 @@ class EvalResult:
             results=results,
             precision=precision,
             revision=config.get("model_sha", ""),
-            still_on_hub=still_on_hub,
-            architecture=architecture,
         )
     def update_with_request_file(self, requests_path):

 import dateutil
 import numpy as np
+from display.formatting import make_clickable_model
+from display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
 @dataclass
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
+        # still_on_hub, _, model_config = is_model_on_hub(
+        #     full_model,
+        #     config.get("model_sha", "main"),
+        #     trust_remote_code=True,
+        #     test_tokenizer=False,
+        # )
+        # architecture = "?"
+        # if model_config is not None:
+        #     architectures = getattr(model_config, "architectures", None)
+        #     if architectures:
+        #         architecture = ";".join(architectures)
         # Extract results available in this file (some results are split in several files)
         results = {}
             results=results,
             precision=precision,
             revision=config.get("model_sha", ""),
+            # still_on_hub=still_on_hub,
+            # architecture=architecture,
         )
     def update_with_request_file(self, requests_path):

src/encodechka/populate.py CHANGED Viewed

@@ -1,13 +1,16 @@
 import json
 import os
 import pandas as pd
 from display.formatting import has_no_nan_values, make_clickable_model
 from display.utils import AutoEvalColumn, EvalQueueColumn
-from leaderboard.read_evals import get_raw_eval_results
-def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
@@ -21,7 +24,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     return raw_data, df
-def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     """Creates the different dataframes for the evaluation queues requestes"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []

 import json
 import os
+from typing import Any
 import pandas as pd
 from display.formatting import has_no_nan_values, make_clickable_model
 from display.utils import AutoEvalColumn, EvalQueueColumn
+from leaderboard.read_evals import EvalResult, get_raw_eval_results
+def get_leaderboard_df(
+    results_path: str, requests_path: str, cols: list, benchmark_cols: list
+) -> tuple[list[EvalResult], Any]:
     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
     return raw_data, df
+def get_evaluation_queue_df(save_path: str, cols: list) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
     """Creates the different dataframes for the evaluation queues requestes"""
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []

src/encodechka/submission/check_validity.py CHANGED Viewed

@@ -34,56 +34,63 @@
 #     return True, ""
 #
 #
-# def is_model_on_hub(
-#     model_name: str,
-#     revision: str,
-#     token: str | None = None,
-#     trust_remote_code=False,
-#     test_tokenizer=False,
-# ) -> tuple[bool, str]:
-#     """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
-#     try:
-#         config = AutoConfig.from_pretrained(
-#             model_name,
-#             revision=revision,
-#             trust_remote_code=trust_remote_code,
-#             token=token,
-#         )
-#         if test_tokenizer:
-#             try:
-#                 tk = AutoTokenizer.from_pretrained(
-#                     model_name,
-#                     revision=revision,
-#                     trust_remote_code=trust_remote_code,
-#                     token=token,
-#                 )
-#             except ValueError as e:
-#                 return (
-#                     False,
-#                     f"uses a tokenizer which is not in a transformers release: {e}",
-#                     None,
-#                 )
-#             except Exception:
-#                 return (
-#                     False,
-#                     "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
-#                     None,
-#                 )
-#         return True, None, config
-#
-#     except ValueError:
-#         return (
-#             False,
-#             "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
-#             None,
-#         )
-#
-#     except Exception:
-#         return False, "was not found on hub!", None
 #
 #
 # def get_model_size(model_info: ModelInfo, precision: str):
-#     """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
 #     try:
 #         model_size = round(model_info.safetensors["total"] / 1e9, 3)
 #     except (AttributeError, TypeError):

 #     return True, ""
 #
 #
+def is_model_on_hub(
+    model_name: str,
+    revision: str,
+    token: str | None = None,
+    trust_remote_code=False,
+    test_tokenizer=False,
+) -> tuple[bool, str]:
+    """Checks if the model model_name is on the hub,
+    and whether it (and its tokenizer) can be loaded with AutoClasses."""
+    raise NotImplementedError("Replace with huggingface_hub API")
+    # try:
+    #     config = AutoConfig.from_pretrained(
+    #         model_name,
+    #         revision=revision,
+    #         trust_remote_code=trust_remote_code,
+    #         token=token,
+    #     )
+    #     if test_tokenizer:
+    #         try:
+    #             tk = AutoTokenizer.from_pretrained(
+    #                 model_name,
+    #                 revision=revision,
+    #                 trust_remote_code=trust_remote_code,
+    #                 token=token,
+    #             )
+    #         except ValueError as e:
+    #             return (
+    #                 False,
+    #                 f"uses a tokenizer which is not in a transformers release: {e}",
+    #                 None,
+    #             )
+    #         except Exception:
+    #             return (
+    #                 False,
+    #                 "'s tokenizer cannot be loaded. Is your tokenizer class in a
+    #                 stable transformers release, and correctly configured?",
+    #                 None,
+    #             )
+    #     return True, None, config
+    #
+    # except ValueError:
+    #     return (
+    #         False,
+    #         "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow
+    #         these models to be automatically submitted to the leaderboard.",
+    #         None,
+    #     )
+    #
+    # except Exception:
+    #     return False, "was not found on hub!", None
 #
 #
 # def get_model_size(model_info: ModelInfo, precision: str):
+#     """Gets the model size from the configuration, or the model name if the
+#     configuration does not contain the information."""
 #     try:
 #         model_size = round(model_info.safetensors["total"] / 1e9, 3)
 #     except (AttributeError, TypeError):

src/encodechka/submission/submit.py CHANGED Viewed

@@ -53,7 +53,9 @@
 #             return styled_error(f'Base model "{base_model}" {error}')
 #
 #     if not weight_type == "Adapter":
-#         model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
 #         if not model_on_hub:
 #             return styled_error(f'Model "{model}" {error}')
 #
@@ -118,5 +120,6 @@
 #     os.remove(out_path)
 #
 #     return styled_message(
-#         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
 #     )

 #             return styled_error(f'Base model "{base_model}" {error}')
 #
 #     if not weight_type == "Adapter":
+#         model_on_hub, error, _ = is_model_on_hub(
+#           model_name=model, revision=revision, token=TOKEN, test_tokenizer=True
+#         )
 #         if not model_on_hub:
 #             return styled_error(f'Model "{model}" {error}')
 #
 #     os.remove(out_path)
 #
 #     return styled_message(
+#         "Your request has been submitted to the evaluation queue!\n
+#         Please wait for up to an hour for the model to show in the PENDING list."
 #     )