Spaces:
Runtime error
Runtime error
File size: 4,323 Bytes
348bdab 6c14077 e502d68 0ba78e9 e502d68 0ba78e9 8dec3b6 e502d68 8dec3b6 0ba78e9 6c14077 0ba78e9 0779c9b 6c14077 1e40fe5 6c14077 0779c9b 0ba78e9 1e40fe5 0ba78e9 6c14077 31b9ddb 6c14077 0ba78e9 348bdab 675f890 5b19fc7 675f890 e502d68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
from typing import Dict, List, Union
import jsonlines
import requests
from huggingface_hub import HfApi, ModelFilter, Repository, dataset_info
AUTOTRAIN_TASK_TO_HUB_TASK = {
"binary_classification": "text-classification",
"multi_class_classification": "text-classification",
# "multi_label_classification": "text-classification", # Not fully supported in AutoTrain
"entity_extraction": "token-classification",
"extractive_question_answering": "question-answering",
"translation": "translation",
"summarization": "summarization",
# "single_column_regression": 10,
}
HUB_TASK_TO_AUTOTRAIN_TASK = {v: k for k, v in AUTOTRAIN_TASK_TO_HUB_TASK.items()}
LOGS_REPO = "evaluation-job-logs"
def get_auth_headers(token: str, prefix: str = "autonlp"):
return {"Authorization": f"{prefix} {token}"}
def http_post(path: str, token: str, payload=None, domain: str = None, params=None) -> requests.Response:
"""HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
try:
response = requests.post(
url=domain + path,
json=payload,
headers=get_auth_headers(token=token),
allow_redirects=True,
params=params,
)
except requests.exceptions.ConnectionError:
print("❌ Failed to reach AutoNLP API, check your internet connection")
response.raise_for_status()
return response
def http_get(path: str, domain: str, token: str = None, params: dict = None) -> requests.Response:
"""HTTP POST request to `path`, raises UnreachableAPIError if the API cannot be reached"""
try:
response = requests.get(
url=domain + path,
headers=get_auth_headers(token=token),
allow_redirects=True,
params=params,
)
except requests.exceptions.ConnectionError:
print(f"❌ Failed to reach {path}, check your internet connection")
response.raise_for_status()
return response
def get_metadata(dataset_name: str) -> Union[Dict, None]:
data = dataset_info(dataset_name)
if data.cardData is not None and "train-eval-index" in data.cardData.keys():
return data.cardData["train-eval-index"]
else:
return None
def get_compatible_models(task: str, dataset_ids: List[str]) -> List[str]:
"""
Returns all model IDs that are compatible with the given task and dataset names.
Args:
task (`str`): The task to search for.
dataset_names (`List[str]`): A list of dataset names to search for.
Returns:
A list of model IDs, sorted alphabetically.
"""
# TODO: relax filter on PyTorch models if TensorFlow supported in AutoTrain
compatible_models = []
for dataset_id in dataset_ids:
model_filter = ModelFilter(
task=AUTOTRAIN_TASK_TO_HUB_TASK[task],
trained_dataset=dataset_id,
library=["transformers", "pytorch"],
)
compatible_models.extend(HfApi().list_models(filter=model_filter))
return sorted([model.modelId for model in compatible_models])
def get_key(col_mapping, val):
for key, value in col_mapping.items():
if val == value:
return key
return "key doesn't exist"
def format_col_mapping(col_mapping: dict) -> dict:
for k, v in col_mapping["answers"].items():
col_mapping[f"answers.{k}"] = f"answers.{v}"
del col_mapping["answers"]
return col_mapping
def commit_evaluation_log(evaluation_log, hf_access_token=None):
logs_repo_url = f"https://huggingface.co/datasets/autoevaluate/{LOGS_REPO}"
logs_repo = Repository(
local_dir=LOGS_REPO,
clone_from=logs_repo_url,
repo_type="dataset",
private=True,
use_auth_token=hf_access_token,
)
logs_repo.git_pull()
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r:
lines = []
for obj in r:
lines.append(obj)
lines.append(evaluation_log)
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer:
for job in lines:
writer.write(job)
logs_repo.push_to_hub(
commit_message=f"Evaluation submitted with project name {evaluation_log['payload']['proj_name']}"
)
print("INFO -- Pushed evaluation logs to the Hub")
|