Tristan Thrush commited on
Commit
2dfb09c
β€’
2 Parent(s): 2d74fdd e0edd3d

resolved conflict

Browse files
Files changed (6) hide show
  1. .github/workflows/quality.yml +29 -0
  2. Makefile +8 -0
  3. app.py +36 -18
  4. evaluation.py +46 -0
  5. pyproject.toml +2 -0
  6. utils.py +13 -4
.github/workflows/quality.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Code quality
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+
13
+ check_code_quality:
14
+ name: Check code quality
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - name: Checkout code
18
+ uses: actions/checkout@v2
19
+ - name: Setup Python environment
20
+ uses: actions/setup-python@v2
21
+ with:
22
+ python-version: 3.9
23
+ - name: Install dependencies
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ python -m pip install black isort flake8
27
+ - name: Code quality
28
+ run: |
29
+ make quality
Makefile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ style:
2
+ python -m black --line-length 119 --target-version py39 .
3
+ python -m isort .
4
+
5
+ quality:
6
+ python -m black --check --line-length 119 --target-version py39 .
7
+ python -m isort --check-only .
8
+ python -m flake8 --max-line-length 119
app.py CHANGED
@@ -10,8 +10,8 @@ from huggingface_hub import list_datasets
10
  from tqdm import tqdm
11
  import inspect
12
 
13
- from utils import (get_compatible_models, get_key, get_metadata, http_get,
14
- http_post)
15
 
16
  if Path(".env").is_file():
17
  load_dotenv(".env")
@@ -28,7 +28,7 @@ TASK_TO_ID = {
28
  # "multi_label_classification": 3, # Not fully supported in AutoTrain
29
  "entity_extraction": 4,
30
  "extractive_question_answering": 5,
31
- "translation": 6,
32
  "summarization": 8,
33
  }
34
 
@@ -77,14 +77,14 @@ def get_supported_metrics():
77
  supported_metrics = get_supported_metrics()
78
 
79
 
80
- ###########
81
- ### APP ###
82
- ###########
83
  st.title("Evaluation as a Service")
84
  st.markdown(
85
  """
86
  Welcome to Hugging Face's Evaluation as a Service! This application allows
87
- you to evaluate any πŸ€— Transformers model with a dataset on the Hub. Please
88
  select the dataset and configuration below. The results of your evaluation
89
  will be displayed on the public leaderboard
90
  [here](https://huggingface.co/spaces/autoevaluate/leaderboards).
@@ -107,18 +107,22 @@ if metadata is None:
107
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
108
 
109
  with st.expander("Advanced configuration"):
110
- ## Select task
111
  selected_task = st.selectbox(
112
  "Select a task",
113
  SUPPORTED_TASKS,
114
  index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0,
115
  )
116
- ### Select config
117
  configs = get_dataset_config_names(selected_dataset)
118
  selected_config = st.selectbox("Select a config", configs)
119
 
120
- ## Select splits
121
- splits_resp = http_get(path="/splits", domain=DATASETS_PREVIEW_API, params={"dataset": selected_dataset})
 
 
 
 
122
  if splits_resp.status_code == 200:
123
  split_names = []
124
  all_splits = splits_resp.json()
@@ -132,11 +136,15 @@ with st.expander("Advanced configuration"):
132
  index=split_names.index(metadata[0]["splits"]["eval_split"]) if metadata is not None else 0,
133
  )
134
 
135
- ## Select columns
136
  rows_resp = http_get(
137
  path="/rows",
138
  domain=DATASETS_PREVIEW_API,
139
- params={"dataset": selected_dataset, "config": selected_config, "split": selected_split},
 
 
 
 
140
  ).json()
141
  col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
142
 
@@ -178,7 +186,7 @@ with st.expander("Advanced configuration"):
178
  st.markdown("`tags` column")
179
  with col2:
180
  tokens_col = st.selectbox(
181
- "This column should contain the parts of the text (as an array of tokens) you want to assign labels to",
182
  col_names,
183
  index=col_names.index(get_key(metadata[0]["col_mapping"], "tokens")) if metadata is not None else 0,
184
  )
@@ -322,7 +330,10 @@ with st.form(key="form"):
322
  }
323
  print(f"Payload: {payload}")
324
  project_json_resp = http_post(
325
- path="/projects/create", payload=payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
 
 
 
326
  ).json()
327
  print(project_json_resp)
328
 
@@ -337,7 +348,11 @@ with st.form(key="form"):
337
  payload=payload,
338
  token=HF_TOKEN,
339
  domain=AUTOTRAIN_BACKEND_API,
340
- params={"type": "dataset", "config_name": selected_config, "split_name": selected_split},
 
 
 
 
341
  ).json()
342
  print(data_json_resp)
343
  if data_json_resp["download_status"] == 1:
@@ -353,8 +368,11 @@ with st.form(key="form"):
353
  f"""
354
  Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
355
 
356
- * πŸ“Š Click [here](https://huggingface.co/spaces/autoevaluate/leaderboards) to view the results from your submission
 
357
  """
358
  )
359
  else:
360
- st.error("πŸ™ˆ Oh noes, there was an error submitting your submission!")
 
 
 
10
  from tqdm import tqdm
11
  import inspect
12
 
13
+ from evaluation import filter_evaluated_models
14
+ from utils import get_compatible_models, get_key, get_metadata, http_get, http_post
15
 
16
  if Path(".env").is_file():
17
  load_dotenv(".env")
 
28
  # "multi_label_classification": 3, # Not fully supported in AutoTrain
29
  "entity_extraction": 4,
30
  "extractive_question_answering": 5,
31
+ # "translation": 6, $ Not fully supported in AutoTrain evaluation
32
  "summarization": 8,
33
  }
34
 
 
77
  supported_metrics = get_supported_metrics()
78
 
79
 
80
+ #######
81
+ # APP #
82
+ #######
83
  st.title("Evaluation as a Service")
84
  st.markdown(
85
  """
86
  Welcome to Hugging Face's Evaluation as a Service! This application allows
87
+ you to evaluate πŸ€— Transformers models with a dataset on the Hub. Please
88
  select the dataset and configuration below. The results of your evaluation
89
  will be displayed on the public leaderboard
90
  [here](https://huggingface.co/spaces/autoevaluate/leaderboards).
 
107
  st.warning("No evaluation metadata found. Please configure the evaluation job below.")
108
 
109
  with st.expander("Advanced configuration"):
110
+ # Select task
111
  selected_task = st.selectbox(
112
  "Select a task",
113
  SUPPORTED_TASKS,
114
  index=SUPPORTED_TASKS.index(metadata[0]["task_id"]) if metadata is not None else 0,
115
  )
116
+ # Select config
117
  configs = get_dataset_config_names(selected_dataset)
118
  selected_config = st.selectbox("Select a config", configs)
119
 
120
+ # Select splits
121
+ splits_resp = http_get(
122
+ path="/splits",
123
+ domain=DATASETS_PREVIEW_API,
124
+ params={"dataset": selected_dataset},
125
+ )
126
  if splits_resp.status_code == 200:
127
  split_names = []
128
  all_splits = splits_resp.json()
 
136
  index=split_names.index(metadata[0]["splits"]["eval_split"]) if metadata is not None else 0,
137
  )
138
 
139
+ # Select columns
140
  rows_resp = http_get(
141
  path="/rows",
142
  domain=DATASETS_PREVIEW_API,
143
+ params={
144
+ "dataset": selected_dataset,
145
+ "config": selected_config,
146
+ "split": selected_split,
147
+ },
148
  ).json()
149
  col_names = list(pd.json_normalize(rows_resp["rows"][0]["row"]).columns)
150
 
 
186
  st.markdown("`tags` column")
187
  with col2:
188
  tokens_col = st.selectbox(
189
+ "This column should contain the array of tokens",
190
  col_names,
191
  index=col_names.index(get_key(metadata[0]["col_mapping"], "tokens")) if metadata is not None else 0,
192
  )
 
330
  }
331
  print(f"Payload: {payload}")
332
  project_json_resp = http_post(
333
+ path="/projects/create",
334
+ payload=payload,
335
+ token=HF_TOKEN,
336
+ domain=AUTOTRAIN_BACKEND_API,
337
  ).json()
338
  print(project_json_resp)
339
 
 
348
  payload=payload,
349
  token=HF_TOKEN,
350
  domain=AUTOTRAIN_BACKEND_API,
351
+ params={
352
+ "type": "dataset",
353
+ "config_name": selected_config,
354
+ "split_name": selected_split,
355
+ },
356
  ).json()
357
  print(data_json_resp)
358
  if data_json_resp["download_status"] == 1:
 
368
  f"""
369
  Evaluation takes appoximately 1 hour to complete, so grab a β˜• or 🍡 while you wait:
370
 
371
+ πŸ“Š Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) \
372
+ to view the results from your submission
373
  """
374
  )
375
  else:
376
+ st.error("πŸ™ˆ Oh noes, there was an error submitting your evaluation job!")
377
+ else:
378
+ st.warning("⚠️ No models were selected for evaluation!")
evaluation.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ import streamlit as st
4
+ from huggingface_hub import DatasetFilter, HfApi
5
+ from huggingface_hub.hf_api import DatasetInfo
6
+
7
+
8
+ @dataclass(frozen=True, eq=True)
9
+ class EvaluationInfo:
10
+ task: str
11
+ model: str
12
+ dataset_name: str
13
+ dataset_config: str
14
+ dataset_split: str
15
+
16
+
17
+ def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
18
+ metadata = dataset_info.cardData["eval_info"]
19
+ metadata.pop("col_mapping", None)
20
+ evaluation_info = EvaluationInfo(**metadata)
21
+ return hash(evaluation_info)
22
+
23
+
24
+ def get_evaluation_ids():
25
+ filt = DatasetFilter(author="autoevaluate")
26
+ evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
27
+ return [compute_evaluation_id(dset) for dset in evaluation_datasets]
28
+
29
+
30
+ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split):
31
+ evaluation_ids = get_evaluation_ids()
32
+
33
+ for idx, model in enumerate(models):
34
+ evaluation_info = EvaluationInfo(
35
+ task=task,
36
+ model=model,
37
+ dataset_name=dataset_name,
38
+ dataset_config=dataset_config,
39
+ dataset_split=dataset_split,
40
+ )
41
+ candidate_id = hash(evaluation_info)
42
+ if candidate_id in evaluation_ids:
43
+ st.info(f"Model {model} has already been evaluated on this configuration. Skipping evaluation...")
44
+ models.pop(idx)
45
+
46
+ return models
pyproject.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [tool.isort]
2
+ profile = "black"
utils.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Dict, Union
2
 
3
  import requests
4
- from huggingface_hub import DatasetFilter, HfApi, ModelFilter
5
 
6
  AUTOTRAIN_TASK_TO_HUB_TASK = {
7
  "binary_classification": "text-classification",
@@ -27,7 +27,11 @@ def http_post(path: str, token: str, payload=None, domain: str = None, params=No
27
  """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
28
  try:
29
  response = requests.post(
30
- url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True, params=params
 
 
 
 
31
  )
32
  except requests.exceptions.ConnectionError:
33
  print("❌ Failed to reach AutoNLP API, check your internet connection")
@@ -39,7 +43,10 @@ def http_get(path: str, domain: str, token: str = None, params: dict = None) ->
39
  """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
40
  try:
41
  response = requests.get(
42
- url=domain + path, headers=get_auth_headers(token=token), allow_redirects=True, params=params
 
 
 
43
  )
44
  except requests.exceptions.ConnectionError:
45
  print("❌ Failed to reach AutoNLP API, check your internet connection")
@@ -58,7 +65,9 @@ def get_metadata(dataset_name: str) -> Union[Dict, None]:
58
  def get_compatible_models(task, dataset_name):
59
  # TODO: relax filter on PyTorch models once supported in AutoTrain
60
  filt = ModelFilter(
61
- task=AUTOTRAIN_TASK_TO_HUB_TASK[task], trained_dataset=dataset_name, library=["transformers", "pytorch"]
 
 
62
  )
63
  compatible_models = api.list_models(filter=filt)
64
  return [model.modelId for model in compatible_models]
 
1
  from typing import Dict, Union
2
 
3
  import requests
4
+ from huggingface_hub import HfApi, ModelFilter
5
 
6
  AUTOTRAIN_TASK_TO_HUB_TASK = {
7
  "binary_classification": "text-classification",
 
27
  """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
28
  try:
29
  response = requests.post(
30
+ url=domain + path,
31
+ json=payload,
32
+ headers=get_auth_headers(token=token),
33
+ allow_redirects=True,
34
+ params=params,
35
  )
36
  except requests.exceptions.ConnectionError:
37
  print("❌ Failed to reach AutoNLP API, check your internet connection")
 
43
  """HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
44
  try:
45
  response = requests.get(
46
+ url=domain + path,
47
+ headers=get_auth_headers(token=token),
48
+ allow_redirects=True,
49
+ params=params,
50
  )
51
  except requests.exceptions.ConnectionError:
52
  print("❌ Failed to reach AutoNLP API, check your internet connection")
 
65
  def get_compatible_models(task, dataset_name):
66
  # TODO: relax filter on PyTorch models once supported in AutoTrain
67
  filt = ModelFilter(
68
+ task=AUTOTRAIN_TASK_TO_HUB_TASK[task],
69
+ trained_dataset=dataset_name,
70
+ library=["transformers", "pytorch"],
71
  )
72
  compatible_models = api.list_models(filter=filt)
73
  return [model.modelId for model in compatible_models]