Spaces:

autoevaluate
/

model-evaluator

Runtime error

App Files Files Community

lewtun HF staff commited on Jun 13, 2022

Commit

31b9ddb

•

1 Parent(s): a267f6f

Integrate Omar's feedback

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +26 -12
requirements.txt +1 -1
utils.py +4 -4

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 📊
 colorFrom: red
 colorTo: red
 sdk: streamlit
-sdk_version: 1.2.0
 app_file: app.py
 ---

 colorFrom: red
 colorTo: red
 sdk: streamlit
+sdk_version: 1.10.0
 app_file: app.py
 ---

app.py CHANGED Viewed

@@ -59,9 +59,12 @@ SUPPORTED_TASKS = list(TASK_TO_ID.keys())
 @st.cache
 def get_supported_metrics():
-    metrics = list_metrics()
     supported_metrics = []
     for metric in tqdm(metrics):
         try:
             metric_func = load(metric)
         except Exception as e:
@@ -93,14 +96,15 @@ supported_metrics = get_supported_metrics()
 #######
 # APP #
 #######
-st.title("Evaluation as a Service")
 st.markdown(
     """
-    Welcome to Hugging Face's Evaluation as a Service! This application allows
     you to evaluate 🤗 Transformers
     [models](https://huggingface.co/models?library=transformers&sort=downloads)
-    with a dataset on the Hub. Please select the dataset and configuration
-    below. The results of your evaluation will be displayed on the [public
     leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
     """
 )
@@ -112,7 +116,12 @@ if "dataset" in query_params:
     if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
         default_dataset = query_params["dataset"][0]
-selected_dataset = st.selectbox("Select a dataset", all_datasets, index=all_datasets.index(default_dataset))
 st.experimental_set_query_params(**{"dataset": [selected_dataset]})
@@ -262,9 +271,10 @@ with st.expander("Advanced configuration"):
             col_mapping[target_col] = "target"
     elif selected_task == "extractive_question_answering":
-        col_mapping = metadata[0]["col_mapping"]
-        # Hub YAML parser converts periods to hyphens, so we remap them here
-        col_mapping = format_col_mapping(col_mapping)
         with col1:
             st.markdown("`context` column")
             st.text("")
@@ -327,14 +337,18 @@ with st.expander("Advanced configuration"):
         list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
     )
     st.info(
-        """"Note: user-selected metrics will be run with their default arguments. \
             Check out the [available metrics](https://huggingface.co/metrics) for more details."""
     )
 with st.form(key="form"):
     compatible_models = get_compatible_models(selected_task, selected_dataset)
-    selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
     print("Selected models:", selected_models)
     if len(selected_models) > 0:
@@ -347,7 +361,7 @@ with st.form(key="form"):
         )
         print("Selected models:", selected_models)
-    submit_button = st.form_submit_button("Make submission")
     if submit_button:
         if len(selected_models) > 0:

 @st.cache
 def get_supported_metrics():
+    metrics = [metric.id for metric in list_metrics()]
     supported_metrics = []
     for metric in tqdm(metrics):
+        # TODO: this currently requires all metric dependencies to be installed
+        # in the same environment. Refactor to avoid needing to actually load
+        # the metric.
         try:
             metric_func = load(metric)
         except Exception as e:
 #######
 # APP #
 #######
+st.title("Evaluation on the Hub")
 st.markdown(
     """
+    Welcome to Hugging Face's automatic model evaluator! This application allows
     you to evaluate 🤗 Transformers
     [models](https://huggingface.co/models?library=transformers&sort=downloads)
+    across a wide variety of datasets on the Hub -- all for free! Please select
+    the dataset and configuration below. The results of your evaluation will be
+    displayed on the [public
     leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
     """
 )
     if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
         default_dataset = query_params["dataset"][0]
+selected_dataset = st.selectbox(
+    "Select a dataset",
+    all_datasets,
+    index=all_datasets.index(default_dataset),
+    help="Datasets with metadata can be evaluated with 1-click. Check out the [documentation](https://huggingface.co/docs/hub/datasets-cards) to add evaluation metadata to a dataset.",
+)
 st.experimental_set_query_params(**{"dataset": [selected_dataset]})
             col_mapping[target_col] = "target"
     elif selected_task == "extractive_question_answering":
+        if metadata is not None:
+            col_mapping = metadata[0]["col_mapping"]
+            # Hub YAML parser converts periods to hyphens, so we remap them here
+            col_mapping = format_col_mapping(col_mapping)
         with col1:
             st.markdown("`context` column")
             st.text("")
         list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
     )
     st.info(
+        """Note: user-selected metrics will be run with their default arguments. \
             Check out the [available metrics](https://huggingface.co/metrics) for more details."""
     )
 with st.form(key="form"):
     compatible_models = get_compatible_models(selected_task, selected_dataset)
+    selected_models = st.multiselect(
+        "Select the models you wish to evaluate",
+        compatible_models,
+        help="Don't see your model in this list? Add the dataset and task it was trained to the [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)",
+    )
     print("Selected models:", selected_models)
     if len(selected_models) > 0:
         )
         print("Selected models:", selected_models)
+    submit_button = st.form_submit_button("Evaluate models")
     if submit_button:
         if len(selected_models) > 0:

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 huggingface-hub<0.8
 python-dotenv
-streamlit==1.2.0
 datasets<2.3
 evaluate<0.2
 # Dataset specific deps

 huggingface-hub<0.8
 python-dotenv
+streamlit==1.10.0
 datasets<2.3
 evaluate<0.2
 # Dataset specific deps

utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Dict, Union
 import requests
-from huggingface_hub import HfApi, ModelFilter
 AUTOTRAIN_TASK_TO_HUB_TASK = {
     "binary_classification": "text-classification",
@@ -55,9 +55,9 @@ def http_get(path: str, domain: str, token: str = None, params: dict = None) ->
 def get_metadata(dataset_name: str) -> Union[Dict, None]:
-    data = requests.get(f"https://huggingface.co/api/datasets/{dataset_name}").json()
-    if data["cardData"] is not None and "train-eval-index" in data["cardData"].keys():
-        return data["cardData"]["train-eval-index"]
     else:
         return None

 from typing import Dict, Union
 import requests
+from huggingface_hub import HfApi, ModelFilter, dataset_info
 AUTOTRAIN_TASK_TO_HUB_TASK = {
     "binary_classification": "text-classification",
 def get_metadata(dataset_name: str) -> Union[Dict, None]:
+    data = dataset_info(dataset_name)
+    if data.cardData is not None and "train-eval-index" in data.cardData.keys():
+        return data.cardData["train-eval-index"]
     else:
         return None