Spaces:

autoevaluate
/

model-evaluator

Runtime error

App Files Files Community

lewtun HF staff commited on Jun 24, 2022

Commit

7ec3a4a

•

2 Parent(s): dcc9d39 0b46237

Merge pull request #28 from huggingface/flush-jobs

Browse files

Files changed (2) hide show

app.py +25 -3
notebooks/flush-prediction-repos.ipynb +177 -0

app.py CHANGED Viewed

@@ -4,12 +4,14 @@ from pathlib import Path
 import pandas as pd
 import streamlit as st
 from datasets import get_dataset_config_names
 from dotenv import load_dotenv
 from huggingface_hub import list_datasets
 from evaluation import filter_evaluated_models
 from utils import (
     commit_evaluation_log,
     format_col_mapping,
     get_compatible_models,
@@ -146,9 +148,8 @@ selected_dataset = st.selectbox(
     "Select a dataset",
     all_datasets,
     index=all_datasets.index(default_dataset),
-    help="""Datasets with metadata can be evaluated with 1-click. Check out the \
-        [documentation](https://huggingface.co/docs/hub/datasets-cards) to add \
-        evaluation metadata to a dataset.""",
 )
 st.experimental_set_query_params(**{"dataset": [selected_dataset]})
@@ -495,6 +496,18 @@ with st.form(key="form"):
                     ).json()
                     print(f"INFO -- AutoTrain job response: {train_json_resp}")
                     if train_json_resp["success"]:
                         st.success("✅ Successfully submitted evaluation job!")
                         st.markdown(
                             f"""
@@ -506,6 +519,15 @@ with st.form(key="form"):
                                 Check your email for notifications.
                         * 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
                             to view the results from your submission once the Hub pull request is merged.
                         """
                         )
                         print("INFO -- Pushing evaluation job logs to the Hub")

 import pandas as pd
 import streamlit as st
+import yaml
 from datasets import get_dataset_config_names
 from dotenv import load_dotenv
 from huggingface_hub import list_datasets
 from evaluation import filter_evaluated_models
 from utils import (
+    AUTOTRAIN_TASK_TO_HUB_TASK,
     commit_evaluation_log,
     format_col_mapping,
     get_compatible_models,
     "Select a dataset",
     all_datasets,
     index=all_datasets.index(default_dataset),
+    help="""Datasets with metadata can be evaluated with 1-click. Configure an evaluation job to add \
+        new metadata to a dataset card.""",
 )
 st.experimental_set_query_params(**{"dataset": [selected_dataset]})
                     ).json()
                     print(f"INFO -- AutoTrain job response: {train_json_resp}")
                     if train_json_resp["success"]:
+                        train_eval_index = {
+                            "train-eval-index": [
+                                {
+                                    "config": selected_config,
+                                    "task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
+                                    "task_id": selected_task,
+                                    "splits": {"eval_split": selected_split},
+                                    "col_mapping": col_mapping,
+                                }
+                            ]
+                        }
+                        selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
                         st.success("✅ Successfully submitted evaluation job!")
                         st.markdown(
                             f"""
                                 Check your email for notifications.
                         * 📊 Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
                             to view the results from your submission once the Hub pull request is merged.
+                        * Add the following metadata to the \
+                            [dataset card](https://huggingface.co/datasets/{selected_dataset}/blob/main/README.md) \
+                                to enable 1-click evaluations:
+                        """
+                        )
+                        st.markdown(
+                            f"""
+                        ```yaml
+                        {selected_metadata}
                         """
                         )
                         print("INFO -- Pushing evaluation job logs to the Hub")

notebooks/flush-prediction-repos.ipynb ADDED Viewed

	@@ -0,0 +1,177 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c8093b9e-ca6a-423d-96c3-5fe21f7109a1",
+   "metadata": {},
+   "source": [
+    "## Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "efe8cda7-a687-4867-b1f0-8efbcd428681",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "from huggingface_hub import DatasetFilter, delete_repo, list_datasets\n",
+    "from tqdm.auto import tqdm\n",
+    "\n",
+    "if Path(\".env\").is_file():\n",
+    "    load_dotenv(\".env\")\n",
+    "\n",
+    "HF_TOKEN = os.getenv(\"HF_TOKEN\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8f6e01f0-b658-451f-999c-e08d9f4bbbd3",
+   "metadata": {},
+   "source": [
+    "## Get all prediction repos from autoevaluate org"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2e369478-66d3-498d-a8fd-95bc9180f362",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_prediction_repos():\n",
+    "    all_repos = list_datasets(author=\"autoevaluate\")\n",
+    "    prediction_repos = [\n",
+    "        repo for repo in all_repos if repo.id.split(\"/\")[1].startswith(\"autoeval-\")\n",
+    "    ]\n",
+    "    return prediction_repos"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "542db019-d01f-42f5-bef4-888dae8eeadb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "66"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prediction_repos = get_prediction_repos()\n",
+    "len(prediction_repos)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "331cfabf-4b73-490f-8d6a-86b5bc162666",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatasetInfo: {\n",
+       "\tid: autoevaluate/autoeval-staging-eval-project-9dcc51b5-6464670\n",
+       "\tsha: d3bb02be592d167f7a217ac9341d187142d9a90a\n",
+       "\tlastModified: 2022-06-13T14:54:34.000Z\n",
+       "\ttags: ['type:predictions', 'tags:autotrain', 'tags:evaluation', 'datasets:glue']\n",
+       "\tprivate: False\n",
+       "\tauthor: autoevaluate\n",
+       "\tdescription: None\n",
+       "\tcitation: None\n",
+       "\tcardData: None\n",
+       "\tsiblings: None\n",
+       "\tgated: False\n",
+       "\tdownloads: 12\n",
+       "}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prediction_repos[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "57a86b69-ffe8-4035-8f3d-5c917d8ce7bf",
+   "metadata": {},
+   "source": [
+    "## Delete all prediction repos"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6c8e23e7-2a6d-437b-9742-17f37684d9eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "06fa304dcc6d44e39205b20a5e488052",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/66 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "for repo in tqdm(prediction_repos):\n",
+    "    delete_repo(\n",
+    "        repo_id=repo.id,\n",
+    "        repo_type=\"dataset\",\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7d64b0aa-d05f-4497-9bd2-eb2fc0d8bd7a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "autoevaluate",
+   "language": "python",
+   "name": "autoevaluate"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}