leaderboard / src /backend.py
qgallouedec's picture
qgallouedec HF staff
fix eval backend
96424ac
raw
history blame
No virus
3.01 kB
import json
import os
import re
import tempfile
from huggingface_hub import CommitOperationAdd, HfApi
from src.evaluation import evaluate
from src.logging import setup_logger
logger = setup_logger(__name__)
API = HfApi(token=os.environ.get("TOKEN"))
RESULTS_REPO = "open-rl-leaderboard/results"
def _backend_routine():
# List only the text classification models
rl_models = list(API.list_models(filter="reinforcement-learning"))
logger.info(f"Found {len(rl_models)} RL models")
compatible_models = []
for model in rl_models:
filenames = [sib.rfilename for sib in model.siblings]
if "agent.pt" in filenames:
compatible_models.append((model.modelId, model.sha))
logger.info(f"Found {len(compatible_models)} compatible models")
# Get the results
pattern = re.compile(r"^[^/]*/[^/]*/[^/]*results_[a-f0-9]+\.json$")
filenames = API.list_repo_files(RESULTS_REPO, repo_type="dataset")
filenames = [filename for filename in filenames if pattern.match(filename)]
evaluated_models = set()
for filename in filenames:
path = API.hf_hub_download(repo_id=RESULTS_REPO, filename=filename, repo_type="dataset")
with open(path) as fp:
report = json.load(fp)
evaluated_models.add((report["config"]["model_id"], report["config"]["model_sha"]))
# Find the models that are not associated with any results
pending_models = list(set(compatible_models) - evaluated_models)
logger.info(f"Found {len(pending_models)} pending models")
if len(pending_models) == 0:
return None
# Run an evaluation on the models
with tempfile.TemporaryDirectory() as tmp_dir:
commits = []
model_id, sha = pending_models[0]
logger.info(f"Running evaluation on {model_id}")
report = {"config": {"model_id": model_id, "model_sha": sha}}
try:
evaluations = evaluate(model_id, revision=sha)
except Exception as e:
logger.error(f"Error evaluating {model_id}: {e}")
evaluations = None
if evaluations is not None:
report["results"] = evaluations
report["status"] = "DONE"
else:
report["status"] = "FAILED"
# Update the results
dumped = json.dumps(report, indent=2)
path_in_repo = f"{model_id}/results_{sha}.json"
local_path = os.path.join(tmp_dir, path_in_repo)
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, "w") as f:
f.write(dumped)
commits.append(CommitOperationAdd(path_in_repo=path_in_repo, path_or_fileobj=local_path))
API.create_commit(
repo_id=RESULTS_REPO, commit_message="Add evaluation results", operations=commits, repo_type="dataset"
)
def backend_routine():
try:
_backend_routine()
except Exception as e:
logger.error(f"{e.__class__.__name__}: {str(e)}")
if __name__=="__main__":
backend_routine()