Spaces:
Runtime error
Runtime error
Add connection to AutoTrain
Browse files- app.py +36 -20
- requirements.txt +2 -1
- utils.py +37 -0
app.py
CHANGED
@@ -1,41 +1,57 @@
|
|
1 |
-
import
|
2 |
-
from
|
3 |
-
|
4 |
-
api = HfApi()
|
5 |
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
filt = DatasetFilter(dataset_name=dataset_name)
|
9 |
-
data = api.list_datasets(filter=filt, full=True)
|
10 |
-
return data[0].cardData["train-eval-index"]
|
11 |
|
|
|
|
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
return [model.modelId for model in compatible_models]
|
17 |
|
18 |
|
19 |
with st.form(key="form"):
|
20 |
|
21 |
-
dataset_name = st.selectbox("Select a dataset to evaluate on", ["lewtun/
|
22 |
|
|
|
|
|
|
|
|
|
|
|
23 |
metadata = get_metadata(dataset_name)
|
24 |
-
# st.write(metadata)
|
25 |
|
26 |
dataset_config = st.selectbox("Select the subset to evaluate on", [metadata[0]["config"]])
|
27 |
|
28 |
splits = metadata[0]["splits"]
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
evaluation_split = st.selectbox("Select the split to evaluate on", [v for d in splits for k, v in d.items()])
|
33 |
|
34 |
-
compatible_models = get_compatible_models(metadata[0]["task"],
|
35 |
|
36 |
-
|
37 |
|
38 |
submit_button = st.form_submit_button("Make Submission")
|
39 |
|
40 |
if submit_button:
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
|
|
|
|
3 |
|
4 |
+
import streamlit as st
|
5 |
+
from dotenv import load_dotenv
|
6 |
|
7 |
+
from utils import get_compatible_models, get_metadata, http_post
|
|
|
|
|
|
|
8 |
|
9 |
+
if Path(".env").is_file():
|
10 |
+
load_dotenv(".env")
|
11 |
|
12 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
13 |
+
AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
|
14 |
+
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
|
|
|
15 |
|
16 |
|
17 |
with st.form(key="form"):
|
18 |
|
19 |
+
dataset_name = st.selectbox("Select a dataset to evaluate on", ["lewtun/autoevaluate__emotion"])
|
20 |
|
21 |
+
# TODO: remove this step once we select real datasets
|
22 |
+
# Strip out original dataset name
|
23 |
+
original_dataset_name = dataset_name.split("/")[-1].split("__")[-1]
|
24 |
+
|
25 |
+
# In general this will be a list of multiple configs => need to generalise logic here
|
26 |
metadata = get_metadata(dataset_name)
|
|
|
27 |
|
28 |
dataset_config = st.selectbox("Select the subset to evaluate on", [metadata[0]["config"]])
|
29 |
|
30 |
splits = metadata[0]["splits"]
|
31 |
+
split_names = list(splits.values())
|
32 |
+
eval_split = splits.get("eval_split", split_names[0])
|
33 |
|
34 |
+
selected_split = st.selectbox("Select the split to evaluate on", split_names, index=split_names.index(eval_split))
|
|
|
|
|
35 |
|
36 |
+
compatible_models = get_compatible_models(metadata[0]["task"], original_dataset_name)
|
37 |
|
38 |
+
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models, compatible_models[0])
|
39 |
|
40 |
submit_button = st.form_submit_button("Make Submission")
|
41 |
|
42 |
if submit_button:
|
43 |
+
for model in selected_models:
|
44 |
+
payload = {
|
45 |
+
"username": AUTOTRAIN_USERNAME,
|
46 |
+
"task": 1,
|
47 |
+
"model": model,
|
48 |
+
"col_mapping": {"sentence": "text", "label": "target"},
|
49 |
+
"split": selected_split,
|
50 |
+
"dataset": original_dataset_name,
|
51 |
+
"config": dataset_config,
|
52 |
+
}
|
53 |
+
json_resp = http_post(
|
54 |
+
path="/evaluate/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
|
55 |
+
).json()
|
56 |
+
|
57 |
+
st.success(f"β
Successfully submitted model {model} for evaluation with job ID {json_resp['id']}")
|
requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
huggingface-hub==0.4.0
|
|
|
|
1 |
+
huggingface-hub==0.4.0
|
2 |
+
python-dotenv
|
utils.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from huggingface_hub import DatasetFilter, HfApi, ModelFilter
|
3 |
+
|
4 |
+
api = HfApi()
|
5 |
+
|
6 |
+
|
7 |
+
def get_auth_headers(token: str, prefix: str = "autonlp"):
|
8 |
+
return {"Authorization": f"{prefix} {token}"}
|
9 |
+
|
10 |
+
|
11 |
+
def http_post(
|
12 |
+
path: str,
|
13 |
+
token: str,
|
14 |
+
payload=None,
|
15 |
+
domain: str = None,
|
16 |
+
) -> requests.Response:
|
17 |
+
"""HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
|
18 |
+
try:
|
19 |
+
response = requests.post(
|
20 |
+
url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True
|
21 |
+
)
|
22 |
+
except requests.exceptions.ConnectionError:
|
23 |
+
print("β Failed to reach AutoNLP API, check your internet connection")
|
24 |
+
response.raise_for_status()
|
25 |
+
return response
|
26 |
+
|
27 |
+
|
28 |
+
def get_metadata(dataset_name):
|
29 |
+
filt = DatasetFilter(dataset_name=dataset_name)
|
30 |
+
data = api.list_datasets(filter=filt, full=True)
|
31 |
+
return data[0].cardData["train-eval-index"]
|
32 |
+
|
33 |
+
|
34 |
+
def get_compatible_models(task, dataset_name):
|
35 |
+
filt = ModelFilter(task=task, trained_dataset=dataset_name)
|
36 |
+
compatible_models = api.list_models(filter=filt)
|
37 |
+
return [model.modelId for model in compatible_models]
|