Spaces:
Runtime error
Runtime error
Integrate Omar's feedback
Browse files
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: π
|
|
4 |
colorFrom: red
|
5 |
colorTo: red
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
---
|
10 |
|
|
|
4 |
colorFrom: red
|
5 |
colorTo: red
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.10.0
|
8 |
app_file: app.py
|
9 |
---
|
10 |
|
app.py
CHANGED
@@ -59,9 +59,12 @@ SUPPORTED_TASKS = list(TASK_TO_ID.keys())
|
|
59 |
|
60 |
@st.cache
|
61 |
def get_supported_metrics():
|
62 |
-
metrics = list_metrics()
|
63 |
supported_metrics = []
|
64 |
for metric in tqdm(metrics):
|
|
|
|
|
|
|
65 |
try:
|
66 |
metric_func = load(metric)
|
67 |
except Exception as e:
|
@@ -93,14 +96,15 @@ supported_metrics = get_supported_metrics()
|
|
93 |
#######
|
94 |
# APP #
|
95 |
#######
|
96 |
-
st.title("Evaluation
|
97 |
st.markdown(
|
98 |
"""
|
99 |
-
Welcome to Hugging Face's
|
100 |
you to evaluate π€ Transformers
|
101 |
[models](https://huggingface.co/models?library=transformers&sort=downloads)
|
102 |
-
|
103 |
-
below. The results of your evaluation will be
|
|
|
104 |
leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
|
105 |
"""
|
106 |
)
|
@@ -112,7 +116,12 @@ if "dataset" in query_params:
|
|
112 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
|
113 |
default_dataset = query_params["dataset"][0]
|
114 |
|
115 |
-
selected_dataset = st.selectbox(
|
|
|
|
|
|
|
|
|
|
|
116 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
117 |
|
118 |
|
@@ -262,9 +271,10 @@ with st.expander("Advanced configuration"):
|
|
262 |
col_mapping[target_col] = "target"
|
263 |
|
264 |
elif selected_task == "extractive_question_answering":
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
268 |
with col1:
|
269 |
st.markdown("`context` column")
|
270 |
st.text("")
|
@@ -327,14 +337,18 @@ with st.expander("Advanced configuration"):
|
|
327 |
list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
|
328 |
)
|
329 |
st.info(
|
330 |
-
"""
|
331 |
Check out the [available metrics](https://huggingface.co/metrics) for more details."""
|
332 |
)
|
333 |
|
334 |
with st.form(key="form"):
|
335 |
|
336 |
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
337 |
-
selected_models = st.multiselect(
|
|
|
|
|
|
|
|
|
338 |
print("Selected models:", selected_models)
|
339 |
|
340 |
if len(selected_models) > 0:
|
@@ -347,7 +361,7 @@ with st.form(key="form"):
|
|
347 |
)
|
348 |
print("Selected models:", selected_models)
|
349 |
|
350 |
-
submit_button = st.form_submit_button("
|
351 |
|
352 |
if submit_button:
|
353 |
if len(selected_models) > 0:
|
|
|
59 |
|
60 |
@st.cache
|
61 |
def get_supported_metrics():
|
62 |
+
metrics = [metric.id for metric in list_metrics()]
|
63 |
supported_metrics = []
|
64 |
for metric in tqdm(metrics):
|
65 |
+
# TODO: this currently requires all metric dependencies to be installed
|
66 |
+
# in the same environment. Refactor to avoid needing to actually load
|
67 |
+
# the metric.
|
68 |
try:
|
69 |
metric_func = load(metric)
|
70 |
except Exception as e:
|
|
|
96 |
#######
|
97 |
# APP #
|
98 |
#######
|
99 |
+
st.title("Evaluation on the Hub")
|
100 |
st.markdown(
|
101 |
"""
|
102 |
+
Welcome to Hugging Face's automatic model evaluator! This application allows
|
103 |
you to evaluate π€ Transformers
|
104 |
[models](https://huggingface.co/models?library=transformers&sort=downloads)
|
105 |
+
across a wide variety of datasets on the Hub -- all for free! Please select
|
106 |
+
the dataset and configuration below. The results of your evaluation will be
|
107 |
+
displayed on the [public
|
108 |
leaderboard](https://huggingface.co/spaces/autoevaluate/leaderboards).
|
109 |
"""
|
110 |
)
|
|
|
116 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in all_datasets:
|
117 |
default_dataset = query_params["dataset"][0]
|
118 |
|
119 |
+
selected_dataset = st.selectbox(
|
120 |
+
"Select a dataset",
|
121 |
+
all_datasets,
|
122 |
+
index=all_datasets.index(default_dataset),
|
123 |
+
help="Datasets with metadata can be evaluated with 1-click. Check out the [documentation](https://huggingface.co/docs/hub/datasets-cards) to add evaluation metadata to a dataset.",
|
124 |
+
)
|
125 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
126 |
|
127 |
|
|
|
271 |
col_mapping[target_col] = "target"
|
272 |
|
273 |
elif selected_task == "extractive_question_answering":
|
274 |
+
if metadata is not None:
|
275 |
+
col_mapping = metadata[0]["col_mapping"]
|
276 |
+
# Hub YAML parser converts periods to hyphens, so we remap them here
|
277 |
+
col_mapping = format_col_mapping(col_mapping)
|
278 |
with col1:
|
279 |
st.markdown("`context` column")
|
280 |
st.text("")
|
|
|
337 |
list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
|
338 |
)
|
339 |
st.info(
|
340 |
+
"""Note: user-selected metrics will be run with their default arguments. \
|
341 |
Check out the [available metrics](https://huggingface.co/metrics) for more details."""
|
342 |
)
|
343 |
|
344 |
with st.form(key="form"):
|
345 |
|
346 |
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
347 |
+
selected_models = st.multiselect(
|
348 |
+
"Select the models you wish to evaluate",
|
349 |
+
compatible_models,
|
350 |
+
help="Don't see your model in this list? Add the dataset and task it was trained to the [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)",
|
351 |
+
)
|
352 |
print("Selected models:", selected_models)
|
353 |
|
354 |
if len(selected_models) > 0:
|
|
|
361 |
)
|
362 |
print("Selected models:", selected_models)
|
363 |
|
364 |
+
submit_button = st.form_submit_button("Evaluate models")
|
365 |
|
366 |
if submit_button:
|
367 |
if len(selected_models) > 0:
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
huggingface-hub<0.8
|
2 |
python-dotenv
|
3 |
-
streamlit==1.
|
4 |
datasets<2.3
|
5 |
evaluate<0.2
|
6 |
# Dataset specific deps
|
|
|
1 |
huggingface-hub<0.8
|
2 |
python-dotenv
|
3 |
+
streamlit==1.10.0
|
4 |
datasets<2.3
|
5 |
evaluate<0.2
|
6 |
# Dataset specific deps
|
utils.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from typing import Dict, Union
|
2 |
|
3 |
import requests
|
4 |
-
from huggingface_hub import HfApi, ModelFilter
|
5 |
|
6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
7 |
"binary_classification": "text-classification",
|
@@ -55,9 +55,9 @@ def http_get(path: str, domain: str, token: str = None, params: dict = None) ->
|
|
55 |
|
56 |
|
57 |
def get_metadata(dataset_name: str) -> Union[Dict, None]:
|
58 |
-
data =
|
59 |
-
if data
|
60 |
-
return data
|
61 |
else:
|
62 |
return None
|
63 |
|
|
|
1 |
from typing import Dict, Union
|
2 |
|
3 |
import requests
|
4 |
+
from huggingface_hub import HfApi, ModelFilter, dataset_info
|
5 |
|
6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
7 |
"binary_classification": "text-classification",
|
|
|
55 |
|
56 |
|
57 |
def get_metadata(dataset_name: str) -> Union[Dict, None]:
|
58 |
+
data = dataset_info(dataset_name)
|
59 |
+
if data.cardData is not None and "train-eval-index" in data.cardData.keys():
|
60 |
+
return data.cardData["train-eval-index"]
|
61 |
else:
|
62 |
return None
|
63 |
|