lewtun HF staff commited on
Commit
c671908
β€’
1 Parent(s): af2acd4
Files changed (2) hide show
  1. app.py +60 -70
  2. evaluation.py +20 -0
app.py CHANGED
@@ -8,8 +8,7 @@ from datasets import get_dataset_config_names
8
  from dotenv import load_dotenv
9
  from huggingface_hub import list_datasets
10
 
11
- from evaluation import (EvaluationInfo, compute_evaluation_id,
12
- get_evaluation_ids)
13
  from utils import (get_compatible_models, get_key, get_metadata, http_get,
14
  http_post)
15
 
@@ -247,82 +246,73 @@ with st.form(key="form"):
247
  selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
248
  print("Selected models:", selected_models)
249
 
250
- evaluation_ids = get_evaluation_ids()
251
-
252
- for idx, model in enumerate(selected_models):
253
- eval_info = EvaluationInfo(
254
- task=selected_task,
255
- model=model,
256
- dataset_name=selected_dataset,
257
- dataset_config=selected_config,
258
- dataset_split=selected_split,
259
- )
260
- candidate_id = hash(eval_info)
261
- if candidate_id in evaluation_ids:
262
- st.info(f"Model {model} has already been evaluated on this configuration. Skipping ...")
263
- selected_models.pop(idx)
264
-
265
  print("Selected models:", selected_models)
266
 
267
  submit_button = st.form_submit_button("Make submission")
268
 
269
  if submit_button:
270
- project_id = str(uuid.uuid4())[:3]
271
- payload = {
272
- "username": AUTOTRAIN_USERNAME,
273
- "proj_name": f"my-eval-project-{project_id}",
274
- "task": TASK_TO_ID[selected_task],
275
- "config": {
276
- "language": "en",
277
- "max_models": 5,
278
- "instance": {
279
- "provider": "aws",
280
- "instance_type": "ml.g4dn.4xlarge",
281
- "max_runtime_seconds": 172800,
282
- "num_instances": 1,
283
- "disk_size_gb": 150,
284
- },
285
- "evaluation": {
286
- "metrics": [],
287
- "models": selected_models,
288
- },
289
- },
290
- }
291
- print(f"Payload: {payload}")
292
- project_json_resp = http_post(
293
- path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
294
- ).json()
295
- print(project_json_resp)
296
-
297
- if project_json_resp["created"]:
298
  payload = {
299
- "split": 4, # use "auto" split choice in AutoTrain
300
- "col_mapping": col_mapping,
301
- "load_config": {"max_size_bytes": 0, "shuffle": False},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  }
303
- data_json_resp = http_post(
304
- path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
305
- payload=payload,
306
- token=HF_TOKEN,
307
- domain=AUTOTRAIN_BACKEND_API,
308
- params={"type": "dataset", "config_name": selected_config, "split_name": selected_split},
309
  ).json()
310
- print(data_json_resp)
311
- if data_json_resp["download_status"] == 1:
312
- train_json_resp = http_get(
313
- path=f"/projects/{project_json_resp['id']}/data/start_process",
 
 
 
 
 
 
 
314
  token=HF_TOKEN,
315
  domain=AUTOTRAIN_BACKEND_API,
 
316
  ).json()
317
- print(train_json_resp)
318
- if train_json_resp["success"]:
319
- st.success(f"βœ… Successfully submitted evaluation job with project ID {project_id}")
320
- st.markdown(
321
- f"""
322
- Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
323
-
324
- * πŸ“Š Click [here](https://huggingface.co/spaces/autoevaluate/leaderboards) to view the results from your submission
325
- """
326
- )
327
- else:
328
- st.error("πŸ™ˆ Oh noes, there was an error submitting your submission!")
 
 
 
 
 
 
 
 
 
 
8
  from dotenv import load_dotenv
9
  from huggingface_hub import list_datasets
10
 
11
+ from evaluation import EvaluationInfo, filter_evaluated_models
 
12
  from utils import (get_compatible_models, get_key, get_metadata, http_get,
13
  http_post)
14
 
 
246
  selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
247
  print("Selected models:", selected_models)
248
 
249
+ selected_models = filter_evaluated_models(
250
+ selected_models, selected_task, selected_dataset, selected_config, selected_split
251
+ )
 
 
 
 
 
 
 
 
 
 
 
 
252
  print("Selected models:", selected_models)
253
 
254
  submit_button = st.form_submit_button("Make submission")
255
 
256
  if submit_button:
257
+ if len(selected_models) > 0:
258
+ project_id = str(uuid.uuid4())[:3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  payload = {
260
+ "username": AUTOTRAIN_USERNAME,
261
+ "proj_name": f"my-eval-project-{project_id}",
262
+ "task": TASK_TO_ID[selected_task],
263
+ "config": {
264
+ "language": "en",
265
+ "max_models": 5,
266
+ "instance": {
267
+ "provider": "aws",
268
+ "instance_type": "ml.g4dn.4xlarge",
269
+ "max_runtime_seconds": 172800,
270
+ "num_instances": 1,
271
+ "disk_size_gb": 150,
272
+ },
273
+ "evaluation": {
274
+ "metrics": [],
275
+ "models": selected_models,
276
+ },
277
+ },
278
  }
279
+ print(f"Payload: {payload}")
280
+ project_json_resp = http_post(
281
+ path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
 
 
 
282
  ).json()
283
+ print(project_json_resp)
284
+
285
+ if project_json_resp["created"]:
286
+ payload = {
287
+ "split": 4, # use "auto" split choice in AutoTrain
288
+ "col_mapping": col_mapping,
289
+ "load_config": {"max_size_bytes": 0, "shuffle": False},
290
+ }
291
+ data_json_resp = http_post(
292
+ path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
293
+ payload=payload,
294
  token=HF_TOKEN,
295
  domain=AUTOTRAIN_BACKEND_API,
296
+ params={"type": "dataset", "config_name": selected_config, "split_name": selected_split},
297
  ).json()
298
+ print(data_json_resp)
299
+ if data_json_resp["download_status"] == 1:
300
+ train_json_resp = http_get(
301
+ path=f"/projects/{project_json_resp['id']}/data/start_process",
302
+ token=HF_TOKEN,
303
+ domain=AUTOTRAIN_BACKEND_API,
304
+ ).json()
305
+ print(train_json_resp)
306
+ if train_json_resp["success"]:
307
+ st.success(f"βœ… Successfully submitted evaluation job with project ID {project_id}")
308
+ st.markdown(
309
+ f"""
310
+ Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
311
+
312
+ * πŸ“Š Click [here](https://huggingface.co/spaces/autoevaluate/leaderboards) to view the results from your submission
313
+ """
314
+ )
315
+ else:
316
+ st.error("πŸ™ˆ Oh noes, there was an error submitting your evaluation job!")
317
+ else:
318
+ st.warning("⚠️ No models were selected for evaluation!")
evaluation.py CHANGED
@@ -1,5 +1,6 @@
1
  from dataclasses import dataclass
2
 
 
3
  from huggingface_hub import DatasetFilter, HfApi
4
  from huggingface_hub.hf_api import DatasetInfo
5
 
@@ -24,3 +25,22 @@ def get_evaluation_ids():
24
  filt = DatasetFilter(author="autoevaluate")
25
  evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
26
  return [compute_evaluation_id(dset) for dset in evaluation_datasets]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from dataclasses import dataclass
2
 
3
+ import streamlit as st
4
  from huggingface_hub import DatasetFilter, HfApi
5
  from huggingface_hub.hf_api import DatasetInfo
6
 
 
25
  filt = DatasetFilter(author="autoevaluate")
26
  evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
27
  return [compute_evaluation_id(dset) for dset in evaluation_datasets]
28
+
29
+
30
+ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split):
31
+ evaluation_ids = get_evaluation_ids()
32
+
33
+ for idx, model in enumerate(models):
34
+ evaluation_info = EvaluationInfo(
35
+ task=task,
36
+ model=model,
37
+ dataset_name=dataset_name,
38
+ dataset_config=dataset_config,
39
+ dataset_split=dataset_split,
40
+ )
41
+ candidate_id = hash(evaluation_info)
42
+ if candidate_id in evaluation_ids:
43
+ st.info(f"Model {model} has already been evaluated on this configuration. Skipping evaluation...")
44
+ models.pop(idx)
45
+
46
+ return models