Spaces:
Runtime error
Runtime error
Add persistent storage for logging
Browse files- .gitignore +4 -1
- app.py +23 -15
- requirements.txt +1 -0
- utils.py +29 -1
.gitignore
CHANGED
@@ -128,4 +128,7 @@ dmypy.json
|
|
128 |
# Pyre type checker
|
129 |
.pyre/
|
130 |
|
131 |
-
scratch/
|
|
|
|
|
|
|
|
128 |
# Pyre type checker
|
129 |
.pyre/
|
130 |
|
131 |
+
scratch/
|
132 |
+
|
133 |
+
# Evaluation job logs
|
134 |
+
evaluation-job-logs/
|
app.py
CHANGED
@@ -13,6 +13,7 @@ from tqdm import tqdm
|
|
13 |
|
14 |
from evaluation import filter_evaluated_models
|
15 |
from utils import (
|
|
|
16 |
format_col_mapping,
|
17 |
get_compatible_models,
|
18 |
get_key,
|
@@ -69,7 +70,7 @@ def get_supported_metrics():
|
|
69 |
metric_func = load(metric)
|
70 |
except Exception as e:
|
71 |
print(e)
|
72 |
-
print("Skipping the following metric, which cannot load:", metric)
|
73 |
continue
|
74 |
|
75 |
argspec = inspect.getfullargspec(metric_func.compute)
|
@@ -128,7 +129,7 @@ st.experimental_set_query_params(**{"dataset": [selected_dataset]})
|
|
128 |
|
129 |
|
130 |
metadata = get_metadata(selected_dataset)
|
131 |
-
print(metadata)
|
132 |
if metadata is None:
|
133 |
st.warning("No evaluation metadata found. Please configure the evaluation job below.")
|
134 |
|
@@ -352,7 +353,7 @@ with st.form(key="form"):
|
|
352 |
help="""Don't see your model in this list? Add the dataset and task it was trained to the \
|
353 |
[model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
|
354 |
)
|
355 |
-
print("Selected models:", selected_models)
|
356 |
|
357 |
if len(selected_models) > 0:
|
358 |
selected_models = filter_evaluated_models(
|
@@ -362,14 +363,14 @@ with st.form(key="form"):
|
|
362 |
selected_config,
|
363 |
selected_split,
|
364 |
)
|
365 |
-
print("Selected models:", selected_models)
|
366 |
|
367 |
-
submit_button = st.form_submit_button("Evaluate models")
|
368 |
|
369 |
if submit_button:
|
370 |
if len(selected_models) > 0:
|
371 |
project_id = str(uuid.uuid4())[:8]
|
372 |
-
|
373 |
"username": AUTOTRAIN_USERNAME,
|
374 |
"proj_name": f"eval-project-{project_id}",
|
375 |
"task": TASK_TO_ID[selected_task],
|
@@ -391,24 +392,24 @@ with st.form(key="form"):
|
|
391 |
},
|
392 |
},
|
393 |
}
|
394 |
-
print(f"Payload: {
|
395 |
project_json_resp = http_post(
|
396 |
path="/projects/create",
|
397 |
-
payload=
|
398 |
token=HF_TOKEN,
|
399 |
domain=AUTOTRAIN_BACKEND_API,
|
400 |
).json()
|
401 |
-
print(project_json_resp)
|
402 |
|
403 |
if project_json_resp["created"]:
|
404 |
-
|
405 |
"split": 4, # use "auto" split choice in AutoTrain
|
406 |
"col_mapping": col_mapping,
|
407 |
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
408 |
}
|
409 |
data_json_resp = http_post(
|
410 |
path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
|
411 |
-
payload=
|
412 |
token=HF_TOKEN,
|
413 |
domain=AUTOTRAIN_BACKEND_API,
|
414 |
params={
|
@@ -417,24 +418,31 @@ with st.form(key="form"):
|
|
417 |
"split_name": selected_split,
|
418 |
},
|
419 |
).json()
|
420 |
-
print(data_json_resp)
|
421 |
if data_json_resp["download_status"] == 1:
|
422 |
train_json_resp = http_get(
|
423 |
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
424 |
token=HF_TOKEN,
|
425 |
domain=AUTOTRAIN_BACKEND_API,
|
426 |
).json()
|
427 |
-
print(train_json_resp)
|
428 |
if train_json_resp["success"]:
|
429 |
-
st.success(f"β
Successfully submitted evaluation job with project
|
430 |
st.markdown(
|
431 |
f"""
|
432 |
-
Evaluation
|
433 |
|
434 |
π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
435 |
to view the results from your submission
|
436 |
"""
|
437 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
else:
|
439 |
st.error("π Oh no, there was an error submitting your evaluation job!")
|
440 |
else:
|
|
|
13 |
|
14 |
from evaluation import filter_evaluated_models
|
15 |
from utils import (
|
16 |
+
commit_evaluation_log,
|
17 |
format_col_mapping,
|
18 |
get_compatible_models,
|
19 |
get_key,
|
|
|
70 |
metric_func = load(metric)
|
71 |
except Exception as e:
|
72 |
print(e)
|
73 |
+
print("WARNING -- Skipping the following metric, which cannot load:", metric)
|
74 |
continue
|
75 |
|
76 |
argspec = inspect.getfullargspec(metric_func.compute)
|
|
|
129 |
|
130 |
|
131 |
metadata = get_metadata(selected_dataset)
|
132 |
+
print(f"INFO -- Dataset metadata: {metadata}")
|
133 |
if metadata is None:
|
134 |
st.warning("No evaluation metadata found. Please configure the evaluation job below.")
|
135 |
|
|
|
353 |
help="""Don't see your model in this list? Add the dataset and task it was trained to the \
|
354 |
[model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
|
355 |
)
|
356 |
+
print("INFO -- Selected models before filter:", selected_models)
|
357 |
|
358 |
if len(selected_models) > 0:
|
359 |
selected_models = filter_evaluated_models(
|
|
|
363 |
selected_config,
|
364 |
selected_split,
|
365 |
)
|
366 |
+
print("INFO -- Selected models after filter:", selected_models)
|
367 |
|
368 |
+
submit_button = st.form_submit_button("Evaluate models π")
|
369 |
|
370 |
if submit_button:
|
371 |
if len(selected_models) > 0:
|
372 |
project_id = str(uuid.uuid4())[:8]
|
373 |
+
project_payload = {
|
374 |
"username": AUTOTRAIN_USERNAME,
|
375 |
"proj_name": f"eval-project-{project_id}",
|
376 |
"task": TASK_TO_ID[selected_task],
|
|
|
392 |
},
|
393 |
},
|
394 |
}
|
395 |
+
print(f"INFO -- Payload: {project_payload}")
|
396 |
project_json_resp = http_post(
|
397 |
path="/projects/create",
|
398 |
+
payload=project_payload,
|
399 |
token=HF_TOKEN,
|
400 |
domain=AUTOTRAIN_BACKEND_API,
|
401 |
).json()
|
402 |
+
print(f"INFO -- Project creation response: {project_json_resp}")
|
403 |
|
404 |
if project_json_resp["created"]:
|
405 |
+
data_payload = {
|
406 |
"split": 4, # use "auto" split choice in AutoTrain
|
407 |
"col_mapping": col_mapping,
|
408 |
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
409 |
}
|
410 |
data_json_resp = http_post(
|
411 |
path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
|
412 |
+
payload=data_payload,
|
413 |
token=HF_TOKEN,
|
414 |
domain=AUTOTRAIN_BACKEND_API,
|
415 |
params={
|
|
|
418 |
"split_name": selected_split,
|
419 |
},
|
420 |
).json()
|
421 |
+
print(f"INFO -- Dataset creation response: {data_json_resp}")
|
422 |
if data_json_resp["download_status"] == 1:
|
423 |
train_json_resp = http_get(
|
424 |
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
425 |
token=HF_TOKEN,
|
426 |
domain=AUTOTRAIN_BACKEND_API,
|
427 |
).json()
|
428 |
+
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
429 |
if train_json_resp["success"]:
|
430 |
+
st.success(f"β
Successfully submitted evaluation job with project name {project_id}")
|
431 |
st.markdown(
|
432 |
f"""
|
433 |
+
Evaluation can take up to 1 hour to complete, so grab a β or π΅ while you wait:
|
434 |
|
435 |
π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
|
436 |
to view the results from your submission
|
437 |
"""
|
438 |
)
|
439 |
+
print("INFO -- Pushing evaluation job logs to the Hub")
|
440 |
+
evaluation_log = {}
|
441 |
+
evaluation_log["payload"] = project_payload
|
442 |
+
evaluation_log["project_creation_response"] = project_json_resp
|
443 |
+
evaluation_log["dataset_creation_response"] = data_json_resp
|
444 |
+
evaluation_log["autotrain_job_response"] = train_json_resp
|
445 |
+
commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
|
446 |
else:
|
447 |
st.error("π Oh no, there was an error submitting your evaluation job!")
|
448 |
else:
|
requirements.txt
CHANGED
@@ -3,6 +3,7 @@ python-dotenv
|
|
3 |
streamlit==1.10.0
|
4 |
datasets<2.3
|
5 |
evaluate<0.2
|
|
|
6 |
# Dataset specific deps
|
7 |
py7zr<0.19
|
8 |
openpyxl<3.1
|
|
|
3 |
streamlit==1.10.0
|
4 |
datasets<2.3
|
5 |
evaluate<0.2
|
6 |
+
jsonlines
|
7 |
# Dataset specific deps
|
8 |
py7zr<0.19
|
9 |
openpyxl<3.1
|
utils.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
from typing import Dict, Union
|
2 |
|
|
|
3 |
import requests
|
4 |
-
from huggingface_hub import HfApi, ModelFilter, dataset_info
|
5 |
|
6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
7 |
"binary_classification": "text-classification",
|
@@ -15,6 +16,8 @@ AUTOTRAIN_TASK_TO_HUB_TASK = {
|
|
15 |
}
|
16 |
|
17 |
HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
|
|
|
|
|
18 |
|
19 |
api = HfApi()
|
20 |
|
@@ -86,3 +89,28 @@ def format_col_mapping(col_mapping: dict) -> dict:
|
|
86 |
col_mapping[f"answers.{k}"] = f"answers.{v}"
|
87 |
del col_mapping["answers"]
|
88 |
return col_mapping
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from typing import Dict, Union
|
2 |
|
3 |
+
import jsonlines
|
4 |
import requests
|
5 |
+
from huggingface_hub import HfApi, ModelFilter, Repository, dataset_info
|
6 |
|
7 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
8 |
"binary_classification": "text-classification",
|
|
|
16 |
}
|
17 |
|
18 |
HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
|
19 |
+
LOGS_REPO = "evaluation-job-logs"
|
20 |
+
|
21 |
|
22 |
api = HfApi()
|
23 |
|
|
|
89 |
col_mapping[f"answers.{k}"] = f"answers.{v}"
|
90 |
del col_mapping["answers"]
|
91 |
return col_mapping
|
92 |
+
|
93 |
+
|
94 |
+
def commit_evaluation_log(evaluation_log, hf_access_token=None):
|
95 |
+
logs_repo_url = f"https://huggingface.co/datasets/autoevaluate/{LOGS_REPO}"
|
96 |
+
logs_repo = Repository(
|
97 |
+
local_dir=LOGS_REPO,
|
98 |
+
clone_from=logs_repo_url,
|
99 |
+
repo_type="dataset",
|
100 |
+
private=True,
|
101 |
+
use_auth_token=hf_access_token,
|
102 |
+
)
|
103 |
+
logs_repo.git_pull()
|
104 |
+
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r:
|
105 |
+
lines = []
|
106 |
+
for obj in r:
|
107 |
+
lines.append(obj)
|
108 |
+
|
109 |
+
lines.append(evaluation_log)
|
110 |
+
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer:
|
111 |
+
for job in lines:
|
112 |
+
writer.write(job)
|
113 |
+
logs_repo.push_to_hub(
|
114 |
+
commit_message=f"Evaluation submitted with project name {evaluation_log['payload']['proj_name']}"
|
115 |
+
)
|
116 |
+
print("INFO -- Pushed evaluation logs to the Hub")
|