Spaces:

autoevaluate
/

model-evaluator

Runtime error

lewtun HF staff commited on Jun 15, 2022

Commit

7b53738

•

2 Parent(s): ecefcd1 dd366f1

Merge pull request #20 from huggingface/add-qa-linking

Files changed (4) hide show

README.md CHANGED Viewed

@@ -23,5 +23,5 @@ The table below shows which tasks are currently supported for evaluation in the
 | `multi_label_classification`    |     ❌     |                                                                                     |
 | `entity_extraction`             |     ✅     | [`eval-staging-838`](https://huggingface.co/datasets/autoevaluate/eval-staging-838) |
 | `extractive_question_answering` |     ✅     |                                                                                     |
-| `translation`                   |     ❌     |                                                                                     |
-| `summarization`                 |     ❌     |                                                                                     |

 | `multi_label_classification`    |     ❌     |                                                                                     |
 | `entity_extraction`             |     ✅     | [`eval-staging-838`](https://huggingface.co/datasets/autoevaluate/eval-staging-838) |
 | `extractive_question_answering` |     ✅     |                                                                                     |
+| `translation`                   |     ✅     |                                                                                     |
+| `summarization`                 |     ✅     |                                                                                     |

app.py CHANGED Viewed

@@ -67,6 +67,7 @@ def get_supported_metrics():
         # in the same environment. Refactor to avoid needing to actually load
         # the metric.
         try:
             metric_func = load(metric)
         except Exception as e:
             print(e)
@@ -103,7 +104,7 @@ st.markdown(
     Welcome to Hugging Face's automatic model evaluator! This application allows
     you to evaluate 🤗 Transformers
     [models](https://huggingface.co/models?library=transformers&sort=downloads)
-    across a wide variety of datasets on the Hub -- all for free! Please select
     the dataset and configuration below. The results of your evaluation will be
     displayed on the [public
     leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
@@ -345,8 +346,7 @@ with st.expander("Advanced configuration"):
     )
 with st.form(key="form"):
-    compatible_models = get_compatible_models(selected_task, selected_dataset)
     selected_models = st.multiselect(
         "Select the models you wish to evaluate",
         compatible_models,

         # in the same environment. Refactor to avoid needing to actually load
         # the metric.
         try:
+            print(f"INFO -- Attempting to load metric: {metric}")
             metric_func = load(metric)
         except Exception as e:
             print(e)
     Welcome to Hugging Face's automatic model evaluator! This application allows
     you to evaluate 🤗 Transformers
     [models](https://huggingface.co/models?library=transformers&sort=downloads)
+    across a wide variety of datasets on the Hub. Please select
     the dataset and configuration below. The results of your evaluation will be
     displayed on the [public
     leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
     )
 with st.form(key="form"):
+    compatible_models = get_compatible_models(selected_task, [selected_dataset])
     selected_models = st.multiselect(
         "Select the models you wish to evaluate",
         compatible_models,

evaluation.py CHANGED Viewed

@@ -43,7 +43,7 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
         )
         candidate_id = hash(evaluation_info)
         if candidate_id in evaluation_ids:
-            st.info(f"Model {model} has already been evaluated on this configuration. Skipping evaluation...")
             models.pop(idx)
     return models

         )
         candidate_id = hash(evaluation_info)
         if candidate_id in evaluation_ids:
+            st.info(f"Model `{model}` has already been evaluated on this configuration. Skipping evaluation...")
             models.pop(idx)
     return models

utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, Union
 import jsonlines
 import requests
@@ -19,9 +19,6 @@ HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items(
 LOGS_REPO = "evaluation-job-logs"
-api = HfApi()
 def get_auth_headers(token: str, prefix: str = "autonlp"):
     return {"Authorization": f"{prefix} {token}"}
@@ -65,15 +62,32 @@ def get_metadata(dataset_name: str) -> Union[Dict, None]:
         return None
-def get_compatible_models(task, dataset_name):
-    # TODO: relax filter on PyTorch models once supported in AutoTrain
-    filt = ModelFilter(
-        task=AUTOTRAIN_TASK_TO_HUB_TASK[task],
-        trained_dataset=dataset_name,
-        library=["transformers", "pytorch"],
-    )
-    compatible_models = api.list_models(filter=filt)
-    return sorted([model.modelId for model in compatible_models])
 def get_key(col_mapping, val):

+from typing import Dict, List, Union
 import jsonlines
 import requests
 LOGS_REPO = "evaluation-job-logs"
 def get_auth_headers(token: str, prefix: str = "autonlp"):
     return {"Authorization": f"{prefix} {token}"}
         return None
+def get_compatible_models(task: str, dataset_ids: List[str]) -> List[str]:
+    """
+    Returns all model IDs that are compatible with the given task and dataset names.
+    Args:
+        task (`str`): The task to search for.
+        dataset_names (`List[str]`): A list of dataset names to search for.
+    Returns:
+        A list of model IDs, sorted alphabetically.
+    """
+    compatible_models = []
+    # Include models trained on SQuAD datasets, since these can be evaluated on
+    # other SQuAD-like datasets
+    if task == "extractive_question_answering":
+        dataset_ids.extend(["squad", "squad_v2"])
+    # TODO: relax filter on PyTorch models if TensorFlow supported in AutoTrain
+    for dataset_id in dataset_ids:
+        model_filter = ModelFilter(
+            task=AUTOTRAIN_TASK_TO_HUB_TASK[task],
+            trained_dataset=dataset_id,
+            library=["transformers", "pytorch"],
+        )
+        compatible_models.extend(HfApi().list_models(filter=model_filter))
+    return set(sorted([model.modelId for model in compatible_models]))
 def get_key(col_mapping, val):