lewtun HF staff commited on
Commit
e502d68
β€’
1 Parent(s): 39d9487

Add persistent storage for logging

Browse files
Files changed (4) hide show
  1. .gitignore +4 -1
  2. app.py +23 -15
  3. requirements.txt +1 -0
  4. utils.py +29 -1
.gitignore CHANGED
@@ -128,4 +128,7 @@ dmypy.json
128
  # Pyre type checker
129
  .pyre/
130
 
131
- scratch/
 
 
 
 
128
  # Pyre type checker
129
  .pyre/
130
 
131
+ scratch/
132
+
133
+ # Evaluation job logs
134
+ evaluation-job-logs/
app.py CHANGED
@@ -13,6 +13,7 @@ from tqdm import tqdm
13
 
14
  from evaluation import filter_evaluated_models
15
  from utils import (
 
16
  format_col_mapping,
17
  get_compatible_models,
18
  get_key,
@@ -69,7 +70,7 @@ def get_supported_metrics():
69
  metric_func = load(metric)
70
  except Exception as e:
71
  print(e)
72
- print("Skipping the following metric, which cannot load:", metric)
73
  continue
74
 
75
  argspec = inspect.getfullargspec(metric_func.compute)
@@ -128,7 +129,7 @@ st.experimental_set_query_params(**{"dataset": [selected_dataset]})
128
 
129
 
130
  metadata = get_metadata(selected_dataset)
131
- print(metadata)
132
  if metadata is None:
133
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
134
 
@@ -352,7 +353,7 @@ with st.form(key="form"):
352
  help="""Don't see your model in this list? Add the dataset and task it was trained to the \
353
  [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
354
  )
355
- print("Selected models:", selected_models)
356
 
357
  if len(selected_models) > 0:
358
  selected_models = filter_evaluated_models(
@@ -362,14 +363,14 @@ with st.form(key="form"):
362
  selected_config,
363
  selected_split,
364
  )
365
- print("Selected models:", selected_models)
366
 
367
- submit_button = st.form_submit_button("Evaluate models")
368
 
369
  if submit_button:
370
  if len(selected_models) > 0:
371
  project_id = str(uuid.uuid4())[:8]
372
- payload = {
373
  "username": AUTOTRAIN_USERNAME,
374
  "proj_name": f"eval-project-{project_id}",
375
  "task": TASK_TO_ID[selected_task],
@@ -391,24 +392,24 @@ with st.form(key="form"):
391
  },
392
  },
393
  }
394
- print(f"Payload: {payload}")
395
  project_json_resp = http_post(
396
  path="/projects/create",
397
- payload=payload,
398
  token=HF_TOKEN,
399
  domain=AUTOTRAIN_BACKEND_API,
400
  ).json()
401
- print(project_json_resp)
402
 
403
  if project_json_resp["created"]:
404
- payload = {
405
  "split": 4, # use "auto" split choice in AutoTrain
406
  "col_mapping": col_mapping,
407
  "load_config": {"max_size_bytes": 0, "shuffle": False},
408
  }
409
  data_json_resp = http_post(
410
  path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
411
- payload=payload,
412
  token=HF_TOKEN,
413
  domain=AUTOTRAIN_BACKEND_API,
414
  params={
@@ -417,24 +418,31 @@ with st.form(key="form"):
417
  "split_name": selected_split,
418
  },
419
  ).json()
420
- print(data_json_resp)
421
  if data_json_resp["download_status"] == 1:
422
  train_json_resp = http_get(
423
  path=f"/projects/{project_json_resp['id']}/data/start_process",
424
  token=HF_TOKEN,
425
  domain=AUTOTRAIN_BACKEND_API,
426
  ).json()
427
- print(train_json_resp)
428
  if train_json_resp["success"]:
429
- st.success(f"βœ… Successfully submitted evaluation job with project ID {project_id}")
430
  st.markdown(
431
  f"""
432
- Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
433
 
434
  πŸ“Š Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
435
  to view the results from your submission
436
  """
437
  )
 
 
 
 
 
 
 
438
  else:
439
  st.error("πŸ™ˆ Oh no, there was an error submitting your evaluation job!")
440
  else:
 
13
 
14
  from evaluation import filter_evaluated_models
15
  from utils import (
16
+ commit_evaluation_log,
17
  format_col_mapping,
18
  get_compatible_models,
19
  get_key,
 
70
  metric_func = load(metric)
71
  except Exception as e:
72
  print(e)
73
+ print("WARNING -- Skipping the following metric, which cannot load:", metric)
74
  continue
75
 
76
  argspec = inspect.getfullargspec(metric_func.compute)
 
129
 
130
 
131
  metadata = get_metadata(selected_dataset)
132
+ print(f"INFO -- Dataset metadata: {metadata}")
133
  if metadata is None:
134
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
135
 
 
353
  help="""Don't see your model in this list? Add the dataset and task it was trained to the \
354
  [model card metadata.](https://huggingface.co/docs/hub/models-cards#model-card-metadata)""",
355
  )
356
+ print("INFO -- Selected models before filter:", selected_models)
357
 
358
  if len(selected_models) > 0:
359
  selected_models = filter_evaluated_models(
 
363
  selected_config,
364
  selected_split,
365
  )
366
+ print("INFO -- Selected models after filter:", selected_models)
367
 
368
+ submit_button = st.form_submit_button("Evaluate models πŸš€")
369
 
370
  if submit_button:
371
  if len(selected_models) > 0:
372
  project_id = str(uuid.uuid4())[:8]
373
+ project_payload = {
374
  "username": AUTOTRAIN_USERNAME,
375
  "proj_name": f"eval-project-{project_id}",
376
  "task": TASK_TO_ID[selected_task],
 
392
  },
393
  },
394
  }
395
+ print(f"INFO -- Payload: {project_payload}")
396
  project_json_resp = http_post(
397
  path="/projects/create",
398
+ payload=project_payload,
399
  token=HF_TOKEN,
400
  domain=AUTOTRAIN_BACKEND_API,
401
  ).json()
402
+ print(f"INFO -- Project creation response: {project_json_resp}")
403
 
404
  if project_json_resp["created"]:
405
+ data_payload = {
406
  "split": 4, # use "auto" split choice in AutoTrain
407
  "col_mapping": col_mapping,
408
  "load_config": {"max_size_bytes": 0, "shuffle": False},
409
  }
410
  data_json_resp = http_post(
411
  path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
412
+ payload=data_payload,
413
  token=HF_TOKEN,
414
  domain=AUTOTRAIN_BACKEND_API,
415
  params={
 
418
  "split_name": selected_split,
419
  },
420
  ).json()
421
+ print(f"INFO -- Dataset creation response: {data_json_resp}")
422
  if data_json_resp["download_status"] == 1:
423
  train_json_resp = http_get(
424
  path=f"/projects/{project_json_resp['id']}/data/start_process",
425
  token=HF_TOKEN,
426
  domain=AUTOTRAIN_BACKEND_API,
427
  ).json()
428
+ print(f"INFO -- AutoTrain job response: {train_json_resp}")
429
  if train_json_resp["success"]:
430
+ st.success(f"βœ… Successfully submitted evaluation job with project name {project_id}")
431
  st.markdown(
432
  f"""
433
+ Evaluation can take up to 1 hour to complete, so grab a β˜• or 🍡 while you wait:
434
 
435
  πŸ“Š Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
436
  to view the results from your submission
437
  """
438
  )
439
+ print("INFO -- Pushing evaluation job logs to the Hub")
440
+ evaluation_log = {}
441
+ evaluation_log["payload"] = project_payload
442
+ evaluation_log["project_creation_response"] = project_json_resp
443
+ evaluation_log["dataset_creation_response"] = data_json_resp
444
+ evaluation_log["autotrain_job_response"] = train_json_resp
445
+ commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
446
  else:
447
  st.error("πŸ™ˆ Oh no, there was an error submitting your evaluation job!")
448
  else:
requirements.txt CHANGED
@@ -3,6 +3,7 @@ python-dotenv
3
  streamlit==1.10.0
4
  datasets<2.3
5
  evaluate<0.2
 
6
  # Dataset specific deps
7
  py7zr<0.19
8
  openpyxl<3.1
 
3
  streamlit==1.10.0
4
  datasets<2.3
5
  evaluate<0.2
6
+ jsonlines
7
  # Dataset specific deps
8
  py7zr<0.19
9
  openpyxl<3.1
utils.py CHANGED
@@ -1,7 +1,8 @@
1
  from typing import Dict, Union
2
 
 
3
  import requests
4
- from huggingface_hub import HfApi, ModelFilter, dataset_info
5
 
6
  AUTOTRAIN_TASK_TO_HUB_TASK = {
7
  "binary_classification": "text-classification",
@@ -15,6 +16,8 @@ AUTOTRAIN_TASK_TO_HUB_TASK = {
15
  }
16
 
17
  HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
 
 
18
 
19
  api = HfApi()
20
 
@@ -86,3 +89,28 @@ def format_col_mapping(col_mapping: dict) -> dict:
86
  col_mapping[f"answers.{k}"] = f"answers.{v}"
87
  del col_mapping["answers"]
88
  return col_mapping
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Dict, Union
2
 
3
+ import jsonlines
4
  import requests
5
+ from huggingface_hub import HfApi, ModelFilter, Repository, dataset_info
6
 
7
  AUTOTRAIN_TASK_TO_HUB_TASK = {
8
  "binary_classification": "text-classification",
 
16
  }
17
 
18
  HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
19
+ LOGS_REPO = "evaluation-job-logs"
20
+
21
 
22
  api = HfApi()
23
 
 
89
  col_mapping[f"answers.{k}"] = f"answers.{v}"
90
  del col_mapping["answers"]
91
  return col_mapping
92
+
93
+
94
+ def commit_evaluation_log(evaluation_log, hf_access_token=None):
95
+ logs_repo_url = f"https://huggingface.co/datasets/autoevaluate/{LOGS_REPO}"
96
+ logs_repo = Repository(
97
+ local_dir=LOGS_REPO,
98
+ clone_from=logs_repo_url,
99
+ repo_type="dataset",
100
+ private=True,
101
+ use_auth_token=hf_access_token,
102
+ )
103
+ logs_repo.git_pull()
104
+ with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r:
105
+ lines = []
106
+ for obj in r:
107
+ lines.append(obj)
108
+
109
+ lines.append(evaluation_log)
110
+ with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer:
111
+ for job in lines:
112
+ writer.write(job)
113
+ logs_repo.push_to_hub(
114
+ commit_message=f"Evaluation submitted with project name {evaluation_log['payload']['proj_name']}"
115
+ )
116
+ print("INFO -- Pushed evaluation logs to the Hub")