Spaces:
Runtime error
Runtime error
resolved conflict
Browse files- .github/workflows/sync_with_spaces.yml +2 -1
- app.py +21 -23
.github/workflows/sync_with_spaces.yml
CHANGED
@@ -16,4 +16,5 @@ jobs:
|
|
16 |
- name: Push to hub
|
17 |
env:
|
18 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
19 |
-
run:
|
|
|
|
16 |
- name: Push to hub
|
17 |
env:
|
18 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
19 |
+
run: |
|
20 |
+
git push https://lewtun:[email protected]/spaces/autoevaluate/autoevaluate main
|
app.py
CHANGED
@@ -41,12 +41,12 @@ TASK_TO_DEFAULT_METRICS = {
|
|
41 |
"summarization": ["rouge1", "rouge2", "rougeL", "rougeLsum", "gen_len"],
|
42 |
}
|
43 |
|
44 |
-
|
45 |
|
46 |
@st.cache
|
47 |
def get_supported_metrics():
|
48 |
metrics = list_metrics()
|
49 |
-
supported_metrics =
|
50 |
for metric in tqdm(metrics):
|
51 |
try:
|
52 |
metric_func = load_metric(metric)
|
@@ -71,7 +71,7 @@ def get_supported_metrics():
|
|
71 |
break
|
72 |
|
73 |
if defaults:
|
74 |
-
supported_metrics
|
75 |
return supported_metrics
|
76 |
|
77 |
supported_metrics = get_supported_metrics()
|
@@ -102,7 +102,6 @@ selected_dataset = st.selectbox("Select a dataset", all_datasets, index=all_data
|
|
102 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
103 |
|
104 |
|
105 |
-
# TODO: In general this will be a list of multiple configs => need to generalise logic here
|
106 |
metadata = get_metadata(selected_dataset)
|
107 |
if metadata is None:
|
108 |
st.warning("No evaluation metadata found. Please configure the evaluation job below.")
|
@@ -111,8 +110,8 @@ with st.expander("Advanced configuration"):
|
|
111 |
## Select task
|
112 |
selected_task = st.selectbox(
|
113 |
"Select a task",
|
114 |
-
|
115 |
-
index=
|
116 |
)
|
117 |
### Select config
|
118 |
configs = get_dataset_config_names(selected_dataset)
|
@@ -136,7 +135,7 @@ with st.expander("Advanced configuration"):
|
|
136 |
## Select columns
|
137 |
rows_resp = http_get(
|
138 |
path="/rows",
|
139 |
-
domain=
|
140 |
params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
|
141 |
).json()
|
142 |
col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
|
@@ -236,6 +235,9 @@ with st.expander("Advanced configuration"):
|
|
236 |
col_mapping[target_col] = "target"
|
237 |
|
238 |
elif selected_task == "extractive_question_answering":
|
|
|
|
|
|
|
239 |
with col1:
|
240 |
st.markdown("`context` column")
|
241 |
st.text("")
|
@@ -257,26 +259,22 @@ with st.expander("Advanced configuration"):
|
|
257 |
context_col = st.selectbox(
|
258 |
"This column should contain the question's context",
|
259 |
col_names,
|
260 |
-
index=col_names.index(get_key(
|
261 |
)
|
262 |
question_col = st.selectbox(
|
263 |
"This column should contain the question to be answered, given the context",
|
264 |
col_names,
|
265 |
-
index=col_names.index(get_key(
|
266 |
)
|
267 |
answers_text_col = st.selectbox(
|
268 |
"This column should contain example answers to the question, extracted from the context",
|
269 |
col_names,
|
270 |
-
index=col_names.index(get_key(
|
271 |
-
if metadata is not None
|
272 |
-
else 0,
|
273 |
)
|
274 |
answers_start_col = st.selectbox(
|
275 |
"This column should contain the indices in the context of the first character of each answers.text",
|
276 |
col_names,
|
277 |
-
index=col_names.index(get_key(
|
278 |
-
if metadata is not None
|
279 |
-
else 0,
|
280 |
)
|
281 |
col_mapping[context_col] = "context"
|
282 |
col_mapping[question_col] = "question"
|
@@ -287,19 +285,19 @@ with st.form(key="form"):
|
|
287 |
|
288 |
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
289 |
st.markdown("The following metrics will be computed")
|
290 |
-
html_string = " ".join([
|
|
|
|
|
|
|
|
|
291 |
st.markdown(html_string, unsafe_allow_html=True)
|
292 |
selected_metrics = st.multiselect(
|
293 |
"(Optional) Select additional metrics",
|
294 |
-
list(set(supported_metrics
|
295 |
)
|
296 |
-
|
297 |
-
argument_string = ", ".join(["-".join(key, value) for key, value in supported_metrics[metric].items()])
|
298 |
-
st.info(f"Note! The arguments for {metric_name} are: {argument_string}")
|
299 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
300 |
-
print("Selected models:", selected_models)
|
301 |
submit_button = st.form_submit_button("Make submission")
|
302 |
-
|
303 |
if submit_button:
|
304 |
project_id = str(uuid.uuid4())[:3]
|
305 |
payload = {
|
@@ -355,7 +353,7 @@ with st.form(key="form"):
|
|
355 |
f"""
|
356 |
Evaluation takes appoximately 1 hour to complete, so grab a β or π΅ while you wait:
|
357 |
|
358 |
-
* π Click [here](https://huggingface.co/spaces/
|
359 |
"""
|
360 |
)
|
361 |
else:
|
|
|
41 |
"summarization": ["rouge1", "rouge2", "rougeL", "rougeLsum", "gen_len"],
|
42 |
}
|
43 |
|
44 |
+
SUPPORTED_TASKS = list(TASK_TO_ID.keys())
|
45 |
|
46 |
@st.cache
|
47 |
def get_supported_metrics():
|
48 |
metrics = list_metrics()
|
49 |
+
supported_metrics = []
|
50 |
for metric in tqdm(metrics):
|
51 |
try:
|
52 |
metric_func = load_metric(metric)
|
|
|
71 |
break
|
72 |
|
73 |
if defaults:
|
74 |
+
supported_metrics.append(metric)
|
75 |
return supported_metrics
|
76 |
|
77 |
supported_metrics = get_supported_metrics()
|
|
|
102 |
st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
103 |
|
104 |
|
|
|
105 |
metadata = get_metadata(selected_dataset)
|
106 |
if metadata is None:
|
107 |
st.warning("No evaluation metadata found. Please configure the evaluation job below.")
|
|
|
110 |
## Select task
|
111 |
selected_task = st.selectbox(
|
112 |
"Select a task",
|
113 |
+
SUPPORTED_TASKS,
|
114 |
+
index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0,
|
115 |
)
|
116 |
### Select config
|
117 |
configs = get_dataset_config_names(selected_dataset)
|
|
|
135 |
## Select columns
|
136 |
rows_resp = http_get(
|
137 |
path="/rows",
|
138 |
+
domain=DATASETS_PREVIEW_API,
|
139 |
params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
|
140 |
).json()
|
141 |
col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
|
|
|
235 |
col_mapping[target_col] = "target"
|
236 |
|
237 |
elif selected_task == "extractive_question_answering":
|
238 |
+
col_mapping = metadata[0]["col_mapping"]
|
239 |
+
# Hub YAML parser converts periods to hyphens, so we remap them here
|
240 |
+
col_mapping = {k.replace("-", "."): v.replace("-", ".") for k, v in col_mapping.items()}
|
241 |
with col1:
|
242 |
st.markdown("`context` column")
|
243 |
st.text("")
|
|
|
259 |
context_col = st.selectbox(
|
260 |
"This column should contain the question's context",
|
261 |
col_names,
|
262 |
+
index=col_names.index(get_key(col_mapping, "context")) if metadata is not None else 0,
|
263 |
)
|
264 |
question_col = st.selectbox(
|
265 |
"This column should contain the question to be answered, given the context",
|
266 |
col_names,
|
267 |
+
index=col_names.index(get_key(col_mapping, "question")) if metadata is not None else 0,
|
268 |
)
|
269 |
answers_text_col = st.selectbox(
|
270 |
"This column should contain example answers to the question, extracted from the context",
|
271 |
col_names,
|
272 |
+
index=col_names.index(get_key(col_mapping, "answers.text")) if metadata is not None else 0,
|
|
|
|
|
273 |
)
|
274 |
answers_start_col = st.selectbox(
|
275 |
"This column should contain the indices in the context of the first character of each answers.text",
|
276 |
col_names,
|
277 |
+
index=col_names.index(get_key(col_mapping, "answers.answer_start")) if metadata is not None else 0,
|
|
|
|
|
278 |
)
|
279 |
col_mapping[context_col] = "context"
|
280 |
col_mapping[question_col] = "question"
|
|
|
285 |
|
286 |
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
287 |
st.markdown("The following metrics will be computed")
|
288 |
+
html_string = " ".join([
|
289 |
+
"<div style=\"padding-right:5px;padding-left:5px;padding-top:5px;padding-bottom:5px;float:left\">"
|
290 |
+
+ "<div style=\"background-color:#D3D3D3;border-radius:5px;display:inline-block;padding-right:5px;padding-left:5px;color:white\">"
|
291 |
+
+ metric + "</div></div>" for metric in TASK_TO_DEFAULT_METRICS[selected_task]
|
292 |
+
])
|
293 |
st.markdown(html_string, unsafe_allow_html=True)
|
294 |
selected_metrics = st.multiselect(
|
295 |
"(Optional) Select additional metrics",
|
296 |
+
list(set(supported_metrics) - set(TASK_TO_DEFAULT_METRICS[selected_task])),
|
297 |
)
|
298 |
+
st.info("Note: user-selected metrics will be run with their default arguments from [here](https://github.com/huggingface/datasets/tree/master/metrics)")
|
|
|
|
|
299 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
|
|
300 |
submit_button = st.form_submit_button("Make submission")
|
|
|
301 |
if submit_button:
|
302 |
project_id = str(uuid.uuid4())[:3]
|
303 |
payload = {
|
|
|
353 |
f"""
|
354 |
Evaluation takes appoximately 1 hour to complete, so grab a β or π΅ while you wait:
|
355 |
|
356 |
+
* π Click [here](https://huggingface.co/spaces/autoevaluate/leaderboards) to view the results from your submission
|
357 |
"""
|
358 |
)
|
359 |
else:
|