Spaces:
Runtime error
Runtime error
Merge pull request #15 from huggingface/fix-app
Browse files- app.py +20 -22
- evaluation.py +7 -4
app.py
CHANGED
@@ -42,15 +42,9 @@ TASK_TO_ID = {
|
|
42 |
TASK_TO_DEFAULT_METRICS = {
|
43 |
"binary_classification": ["f1", "precision", "recall", "auc", "accuracy"],
|
44 |
"multi_class_classification": [
|
45 |
-
"
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"precision_macro",
|
49 |
-
"precision_micro",
|
50 |
-
"precision_weighted",
|
51 |
-
"recall_macro",
|
52 |
-
"recall_micro",
|
53 |
-
"recall_weighted",
|
54 |
"accuracy",
|
55 |
],
|
56 |
"entity_extraction": ["precision", "recall", "f1", "accuracy"],
|
@@ -72,6 +66,7 @@ def get_supported_metrics():
|
|
72 |
except Exception as e:
|
73 |
print(e)
|
74 |
print("Skipping the following metric, which cannot load:", metric)
|
|
|
75 |
|
76 |
argspec = inspect.getfullargspec(metric_func.compute)
|
77 |
if "references" in argspec.kwonlyargs and "predictions" in argspec.kwonlyargs:
|
@@ -307,9 +302,7 @@ with st.expander("Advanced configuration"):
|
|
307 |
col_mapping[answers_text_col] = "answers.text"
|
308 |
col_mapping[answers_start_col] = "answers.answer_start"
|
309 |
|
310 |
-
|
311 |
-
|
312 |
-
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
313 |
st.markdown("The following metrics will be computed")
|
314 |
html_string = " ".join(
|
315 |
[
|
@@ -328,26 +321,31 @@ with st.form(key="form"):
|
|
328 |
)
|
329 |
st.info(
|
330 |
"Note: user-selected metrics will be run with their default arguments from "
|
331 |
-
+ "[here](https://github.com/huggingface/
|
332 |
)
|
333 |
|
|
|
|
|
|
|
|
|
334 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
335 |
print("Selected models:", selected_models)
|
336 |
|
337 |
-
selected_models
|
338 |
-
selected_models
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
|
|
345 |
|
346 |
submit_button = st.form_submit_button("Make submission")
|
347 |
|
348 |
if submit_button:
|
349 |
if len(selected_models) > 0:
|
350 |
-
project_id = str(uuid.uuid4())
|
351 |
payload = {
|
352 |
"username": AUTOTRAIN_USERNAME,
|
353 |
"proj_name": f"eval-project-{project_id}",
|
|
|
42 |
TASK_TO_DEFAULT_METRICS = {
|
43 |
"binary_classification": ["f1", "precision", "recall", "auc", "accuracy"],
|
44 |
"multi_class_classification": [
|
45 |
+
"f1",
|
46 |
+
"precision",
|
47 |
+
"recall",
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
"accuracy",
|
49 |
],
|
50 |
"entity_extraction": ["precision", "recall", "f1", "accuracy"],
|
|
|
66 |
except Exception as e:
|
67 |
print(e)
|
68 |
print("Skipping the following metric, which cannot load:", metric)
|
69 |
+
continue
|
70 |
|
71 |
argspec = inspect.getfullargspec(metric_func.compute)
|
72 |
if "references" in argspec.kwonlyargs and "predictions" in argspec.kwonlyargs:
|
|
|
302 |
col_mapping[answers_text_col] = "answers.text"
|
303 |
col_mapping[answers_start_col] = "answers.answer_start"
|
304 |
|
305 |
+
st.markdown("**Select metrics**")
|
|
|
|
|
306 |
st.markdown("The following metrics will be computed")
|
307 |
html_string = " ".join(
|
308 |
[
|
|
|
321 |
)
|
322 |
st.info(
|
323 |
"Note: user-selected metrics will be run with their default arguments from "
|
324 |
+
+ "[here](https://github.com/huggingface/evaluate/tree/main/metrics)"
|
325 |
)
|
326 |
|
327 |
+
with st.form(key="form"):
|
328 |
+
|
329 |
+
compatible_models = get_compatible_models(selected_task, selected_dataset)
|
330 |
+
|
331 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
332 |
print("Selected models:", selected_models)
|
333 |
|
334 |
+
if len(selected_models) > 0:
|
335 |
+
selected_models = filter_evaluated_models(
|
336 |
+
selected_models,
|
337 |
+
selected_task,
|
338 |
+
selected_dataset,
|
339 |
+
selected_config,
|
340 |
+
selected_split,
|
341 |
+
)
|
342 |
+
print("Selected models:", selected_models)
|
343 |
|
344 |
submit_button = st.form_submit_button("Make submission")
|
345 |
|
346 |
if submit_button:
|
347 |
if len(selected_models) > 0:
|
348 |
+
project_id = str(uuid.uuid4())
|
349 |
payload = {
|
350 |
"username": AUTOTRAIN_USERNAME,
|
351 |
"proj_name": f"eval-project-{project_id}",
|
evaluation.py
CHANGED
@@ -15,10 +15,13 @@ class EvaluationInfo:
|
|
15 |
|
16 |
|
17 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
def get_evaluation_ids():
|
|
|
15 |
|
16 |
|
17 |
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
18 |
+
if dataset_info.cardData is not None:
|
19 |
+
metadata = dataset_info.cardData["eval_info"]
|
20 |
+
metadata.pop("col_mapping", None)
|
21 |
+
evaluation_info = EvaluationInfo(**metadata)
|
22 |
+
return hash(evaluation_info)
|
23 |
+
else:
|
24 |
+
return None
|
25 |
|
26 |
|
27 |
def get_evaluation_ids():
|