Spaces:

autoevaluate
/

model-evaluator

Runtime error

App Files Files Community

lewtun HF staff commited on Apr 29, 2022

Commit

7a3b085

•

2 Parent(s): a22b4e9 3207694

Merge pull request #5 from huggingface/use-dataset-backend

Browse files

Files changed (4) hide show

.gitignore +2 -0
app.py +204 -65
requirements.txt +1 -1
utils.py +38 -10

.gitignore CHANGED Viewed

@@ -127,3 +127,5 @@ dmypy.json
 # Pyre type checker
 .pyre/

 # Pyre type checker
 .pyre/
+scratch/

app.py CHANGED Viewed

@@ -1,10 +1,14 @@
 import os
 from pathlib import Path
 import streamlit as st
 from dotenv import load_dotenv
-from utils import get_compatible_models, get_metadata, http_post
 if Path(".env").is_file():
     load_dotenv(".env")
@@ -12,22 +16,19 @@ if Path(".env").is_file():
 HF_TOKEN = os.getenv("HF_TOKEN")
 AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
 AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
 TASK_TO_ID = {
     "binary_classification": 1,
     "multi_class_classification": 2,
-    "multi_label_classification": 3,
     "entity_extraction": 4,
     "extractive_question_answering": 5,
     "translation": 6,
     "summarization": 8,
-    "single_column_regression": 10,
 }
-# TODO: remove this hardcorded logic and accept any dataset on the Hub
-DATASETS_TO_EVALUATE = ["emotion", "conll2003", "imdb", "squad", "xsum", "ncbi_disease", "go_emotions"]
 ###########
 ### APP ###
 ###########
@@ -42,90 +43,228 @@ st.markdown(
     """
 )
-selectable_datasets = [f"lewtun/autoevaluate__{dset}" for dset in DATASETS_TO_EVALUATE]
 query_params = st.experimental_get_query_params()
-default_dataset = selectable_datasets[0]
 if "dataset" in query_params:
-    if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
         default_dataset = query_params["dataset"][0]
-dataset_name = st.selectbox(
-    "Select a dataset",
-    selectable_datasets,
-    index=selectable_datasets.index(default_dataset)
-)
-st.experimental_set_query_params(**{"dataset": [dataset]})
-# TODO: remove this step once we select real datasets
-# Strip out original dataset name
-original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
-# In general this will be a list of multiple configs => need to generalise logic here
-metadata = get_metadata(dataset_name)
 with st.expander("Advanced configuration"):
-    dataset_config = st.selectbox("Select a config", [metadata[0]["config"]])
-    splits = metadata[0]["splits"]
-    split_names = list(splits.values())
-    eval_split = splits.get("eval_split", split_names[0])
-    selected_split = st.selectbox("Select a split", split_names, index=split_names.index(eval_split))
     # TODO: add a function to handle the mapping task <--> column mapping
-    col_mapping = metadata[0]["col_mapping"]
-    col_names = list(col_mapping.keys())
-    # TODO: figure out how to get all dataset column names (i.e. features) without download dataset itself
     st.markdown("**Map your data columns**")
     col1, col2 = st.columns(2)
     # TODO: find a better way to layout these items
-    # TODO: propagate this information to payload
-    with col1:
-        st.markdown("`text` column")
-        st.text("")
-        st.text("")
-        st.text("")
-        st.text("")
-        st.markdown("`target` column")
-    with col2:
-        st.selectbox("This column should contain the text you want to classify", col_names, index=0)
-        st.selectbox("This column should contain the labels you want to assign to the text", col_names, index=1)
-with st.form(key="form"):
-    compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)
-    selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models, compatible_models[0])
     submit_button = st.form_submit_button("Make submission")
     if submit_button:
-        for model in selected_models:
             payload = {
-                "username": AUTOTRAIN_USERNAME,
-                "task": TASK_TO_ID[metadata[0]["task_id"]],
-                "model": model,
-                "col_mapping": metadata[0]["col_mapping"],
-                "split": selected_split,
-                "dataset": original_dataset_name,
-                "config": dataset_config,
             }
-            json_resp = http_post(
-                path="/evaluate/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
             ).json()
-            if json_resp["status"] == 1:
-                st.success(f"✅ Successfully submitted model {model} for evaluation with job ID {json_resp['id']}")
-                st.markdown(
-                    f"""
-                Evaluation takes appoximately 1 hour to complete, so grab a ☕ or 🍵 while you wait:
-                * 📊 Click [here](https://huggingface.co/spaces/autoevaluate/leaderboards) to view the results from your submission
-                """
-                )
-            else:
-                st.error("🙈 Oh noes, there was an error submitting your submission!")

 import os
+import uuid
 from pathlib import Path
+import pandas as pd
 import streamlit as st
+from datasets import get_dataset_config_names
 from dotenv import load_dotenv
+from huggingface_hub import list_datasets
+from utils import get_compatible_models, get_metadata, http_get, http_post
 if Path(".env").is_file():
     load_dotenv(".env")
 HF_TOKEN = os.getenv("HF_TOKEN")
 AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
 AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
+DATASETS_PREVIEW_API = os.getenv("DATASETS_PREVIEW_API")
 TASK_TO_ID = {
     "binary_classification": 1,
     "multi_class_classification": 2,
+    # "multi_label_classification": 3, # Not fully supported in AutoTrain
     "entity_extraction": 4,
     "extractive_question_answering": 5,
     "translation": 6,
     "summarization": 8,
 }
 ###########
 ### APP ###
 ###########
     """
 )
+all_datasets = [d.id for d in list_datasets()]
 query_params = st.experimental_get_query_params()
+default_dataset = all_datasets[0]
 if "dataset" in query_params:
+    if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
         default_dataset = query_params["dataset"][0]
+selected_dataset = st.selectbox("Select a dataset", all_datasets, index=all_datasets.index(default_dataset))
+st.experimental_set_query_params(**{"dataset": [selected_dataset]})
+# TODO: In general this will be a list of multiple configs => need to generalise logic here
+metadata = get_metadata(selected_dataset)
+if metadata is None:
+    st.warning("No evaluation metadata found. Please configure the evaluation job below.")
 with st.expander("Advanced configuration"):
+    ## Select task
+    selected_task = st.selectbox("Select a task", list(TASK_TO_ID.keys()))
+    ### Select config
+    configs = get_dataset_config_names(selected_dataset)
+    selected_config = st.selectbox("Select a config", configs)
+    ## Select splits
+    splits_resp = http_get(path="/splits", domain=DATASETS_PREVIEW_API, params={"dataset": selected_dataset})
+    if splits_resp.status_code == 200:
+        split_names = []
+        all_splits = splits_resp.json()
+        for split in all_splits["splits"]:
+            if split["config"] == selected_config:
+                split_names.append(split["split"])
+        selected_split = st.selectbox("Select a split", split_names)  # , index=split_names.index(eval_split))
+    ## Show columns
+    rows_resp = http_get(
+        path="/rows",
+        domain="https://datasets-preview.huggingface.tech",
+        params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
+    ).json()
+    col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
+    # splits = metadata[0]["splits"]
+    # split_names = list(splits.values())
+    # eval_split = splits.get("eval_split", split_names[0])
+    # selected_split = st.selectbox("Select a split", split_names, index=split_names.index(eval_split))
     # TODO: add a function to handle the mapping task <--> column mapping
+    # col_mapping = metadata[0]["col_mapping"]
+    # col_names = list(col_mapping.keys())
     st.markdown("**Map your data columns**")
     col1, col2 = st.columns(2)
     # TODO: find a better way to layout these items
+    col_mapping = {}
+    if selected_task in ["binary_classification", "multi_class_classification"]:
+        with col1:
+            st.markdown("`text` column")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.markdown("`target` column")
+        with col2:
+            text_col = st.selectbox("This column should contain the text you want to classify", col_names)
+            target_col = st.selectbox(
+                "This column should contain the labels you want to assign to the text", col_names
+            )
+            col_mapping[text_col] = "text"
+            col_mapping[target_col] = "target"
+    elif selected_task == "entity_extraction":
+        with col1:
+            st.markdown("`tokens` column")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.markdown("`tags` column")
+        with col2:
+            tokens_col = st.selectbox(
+                "This column should contain the parts of the text (as an array of tokens) you want to assign labels to",
+                col_names,
+            )
+            tags_col = st.selectbox(
+                "This column should contain the labels to associate to each part of the text", col_names
+            )
+            col_mapping[tokens_col] = "tokens"
+            col_mapping[tags_col] = "tags"
+    elif selected_task == "translation":
+        with col1:
+            st.markdown("`source` column")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.markdown("`target` column")
+        with col2:
+            text_col = st.selectbox("This column should contain the text you want to translate", col_names)
+            target_col = st.selectbox(
+                "This column should contain an example translation of the source text", col_names
+            )
+            col_mapping[text_col] = "source"
+            col_mapping[target_col] = "target"
+    elif selected_task == "summarization":
+        with col1:
+            st.markdown("`text` column")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.markdown("`target` column")
+        with col2:
+            text_col = st.selectbox("This column should contain the text you want to summarize", col_names)
+            target_col = st.selectbox("This column should contain an example summarization of the text", col_names)
+            col_mapping[text_col] = "text"
+            col_mapping[target_col] = "target"
+    elif selected_task == "extractive_question_answering":
+        with col1:
+            st.markdown("`context` column")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.markdown("`question` column")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.markdown("`answers.text` column")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.text("")
+            st.markdown("`answers.answer_start` column")
+        with col2:
+            context_col = st.selectbox("This column should contain the question's context", col_names)
+            question_col = st.selectbox(
+                "This column should contain the question to be answered, given the context", col_names
+            )
+            answers_text_col = st.selectbox(
+                "This column should contain example answers to the question, extracted from the context", col_names
+            )
+            answers_start_col = st.selectbox(
+                "This column should contain the indices in the context of the first character of each answers.text",
+                col_names,
+            )
+            col_mapping[context_col] = "context"
+            col_mapping[question_col] = "question"
+            col_mapping[answers_text_col] = "answers.text"
+            col_mapping[answers_start_col] = "answers.answer_start"
+with st.form(key="form"):
+    compatible_models = get_compatible_models(selected_task, selected_dataset)
+    selected_models = st.multiselect(
+        "Select the models you wish to evaluate", compatible_models
+    )
     submit_button = st.form_submit_button("Make submission")
     if submit_button:
+        project_id = str(uuid.uuid4())[:3]
+        payload = {
+            "username": AUTOTRAIN_USERNAME,
+            "proj_name": f"my-eval-project-{project_id}",
+            "task": TASK_TO_ID[selected_task],
+            "config": {
+                "language": "en",
+                "max_models": 5,
+                "instance": {
+                    "provider": "aws",
+                    "instance_type": "ml.g4dn.4xlarge",
+                    "max_runtime_seconds": 172800,
+                    "num_instances": 1,
+                    "disk_size_gb": 150,
+                },
+                "evaluation": {
+                    "metrics": [],
+                    "models": selected_models,
+                },
+            },
+        }
+        print(f"Payload: {payload}")
+        project_json_resp = http_post(
+            path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
+        ).json()
+        print(project_json_resp)
+        if project_json_resp["created"]:
             payload = {
+                "split": 4,  # use "auto" split choice in AutoTrain
+                "col_mapping": col_mapping,
+                "load_config": {"max_size_bytes": 0, "shuffle": False},
             }
+            data_json_resp = http_post(
+                path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
+                payload=payload,
+                token=HF_TOKEN,
+                domain=AUTOTRAIN_BACKEND_API,
+                params={"type": "dataset", "config_name": selected_config, "split_name": selected_split},
             ).json()
+            print(data_json_resp)
+            if data_json_resp["download_status"] == 1:
+                train_json_resp = http_get(
+                    path=f"/projects/{project_json_resp['id']}/data/start_process",
+                    token=HF_TOKEN,
+                    domain=AUTOTRAIN_BACKEND_API,
+                ).json()
+                print(train_json_resp)
+                if train_json_resp["success"]:
+                    st.success(f"✅ Successfully submitted evaluation job with project ID {project_id}")
+                    st.markdown(
+                        f"""
+                    Evaluation takes appoximately 1 hour to complete, so grab a ☕ or 🍵 while you wait:
+                    * 📊 Click [here](https://huggingface.co/spaces/huggingface/leaderboards) to view the results from your submission
+                    """
+                    )
+                else:
+                    st.error("🙈 Oh noes, there was an error submitting your submission!")

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 huggingface-hub==0.4.0
 python-dotenv
-streamlit

 huggingface-hub==0.4.0
 python-dotenv
+streamlit==1.2.0

utils.py CHANGED Viewed

@@ -1,6 +1,21 @@
 import requests
 from huggingface_hub import DatasetFilter, HfApi, ModelFilter
 api = HfApi()
@@ -8,16 +23,23 @@ def get_auth_headers(token: str, prefix: str = "autonlp"):
     return {"Authorization": f"{prefix} {token}"}
-def http_post(
-    path: str,
-    token: str,
-    payload=None,
-    domain: str = None,
-) -> requests.Response:
     """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
     try:
         response = requests.post(
-            url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True
         )
     except requests.exceptions.ConnectionError:
         print("❌ Failed to reach AutoNLP API, check your internet connection")
@@ -25,13 +47,19 @@ def http_post(
     return response
-def get_metadata(dataset_name):
     filt = DatasetFilter(dataset_name=dataset_name)
     data = api.list_datasets(filter=filt, full=True)
-    return data[0].cardData["train-eval-index"]
 def get_compatible_models(task, dataset_name):
-    filt = ModelFilter(task=task, trained_dataset=dataset_name, library="transformers")
     compatible_models = api.list_models(filter=filt)
     return [model.modelId for model in compatible_models]

+from typing import Dict, Union
 import requests
 from huggingface_hub import DatasetFilter, HfApi, ModelFilter
+AUTOTRAIN_TASK_TO_HUB_TASK = {
+    "binary_classification": "text-classification",
+    "multi_class_classification": "text-classification",
+    # "multi_label_classification": "text-classification", # Not fully supported in AutoTrain
+    "entity_extraction": "token-classification",
+    "extractive_question_answering": "question-answering",
+    "translation": "translation",
+    "summarization": "summarization",
+    # "single_column_regression": 10,
+}
+HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
 api = HfApi()
     return {"Authorization": f"{prefix} {token}"}
+def http_post(path: str, token: str, payload=None, domain: str = None, params=None) -> requests.Response:
     """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
     try:
         response = requests.post(
+            url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True, params=params
+        )
+    except requests.exceptions.ConnectionError:
+        print("❌ Failed to reach AutoNLP API, check your internet connection")
+    response.raise_for_status()
+    return response
+def http_get(path: str, domain: str, token: str = None, params: dict = None) -> requests.Response:
+    """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
+    try:
+        response = requests.get(
+            url=domain + path, headers=get_auth_headers(token=token), allow_redirects=True, params=params
         )
     except requests.exceptions.ConnectionError:
         print("❌ Failed to reach AutoNLP API, check your internet connection")
     return response
+def get_metadata(dataset_name: str) -> Union[Dict, None]:
     filt = DatasetFilter(dataset_name=dataset_name)
     data = api.list_datasets(filter=filt, full=True)
+    if data[0].cardData is not None and "train-eval-index" in data[0].cardData.keys():
+        return data[0].cardData["train-eval-index"]
+    else:
+        return None
 def get_compatible_models(task, dataset_name):
+    # TODO: relax filter on PyTorch models once supported in AutoTrain
+    filt = ModelFilter(
+        task=AUTOTRAIN_TASK_TO_HUB_TASK[task], trained_dataset=dataset_name, library=["transformers", "pytorch"]
+    )
     compatible_models = api.list_models(filter=filt)
     return [model.modelId for model in compatible_models]