leaderboard

Running on CPU Upgrade

App Files Files Community

leaderboard / src /backend.py

qgallouedec HF staff

fix eval backend

96424ac 6 months ago

raw

history blame

No virus

3.01 kB

	import json
	import os
	import re
	import tempfile

	from huggingface_hub import CommitOperationAdd, HfApi

	from src.evaluation import evaluate
	from src.logging import setup_logger

	logger = setup_logger(__name__)

	API = HfApi(token=os.environ.get("TOKEN"))
	RESULTS_REPO = "open-rl-leaderboard/results"


	def _backend_routine():
	# List only the text classification models
	rl_models = list(API.list_models(filter="reinforcement-learning"))
	logger.info(f"Found {len(rl_models)} RL models")
	compatible_models = []
	for model in rl_models:
	filenames = [sib.rfilename for sib in model.siblings]
	if "agent.pt" in filenames:
	compatible_models.append((model.modelId, model.sha))

	logger.info(f"Found {len(compatible_models)} compatible models")

	# Get the results
	pattern = re.compile(r"^[^/]/[^/]/[^/]*results_[a-f0-9]+\.json$")
	filenames = API.list_repo_files(RESULTS_REPO, repo_type="dataset")
	filenames = [filename for filename in filenames if pattern.match(filename)]

	evaluated_models = set()
	for filename in filenames:
	path = API.hf_hub_download(repo_id=RESULTS_REPO, filename=filename, repo_type="dataset")
	with open(path) as fp:
	report = json.load(fp)
	evaluated_models.add((report["config"]["model_id"], report["config"]["model_sha"]))

	# Find the models that are not associated with any results
	pending_models = list(set(compatible_models) - evaluated_models)
	logger.info(f"Found {len(pending_models)} pending models")

	if len(pending_models) == 0:
	return None

	# Run an evaluation on the models
	with tempfile.TemporaryDirectory() as tmp_dir:
	commits = []
	model_id, sha = pending_models[0]
	logger.info(f"Running evaluation on {model_id}")
	report = {"config": {"model_id": model_id, "model_sha": sha}}
	try:
	evaluations = evaluate(model_id, revision=sha)
	except Exception as e:
	logger.error(f"Error evaluating {model_id}: {e}")
	evaluations = None

	if evaluations is not None:
	report["results"] = evaluations
	report["status"] = "DONE"
	else:
	report["status"] = "FAILED"

	# Update the results
	dumped = json.dumps(report, indent=2)
	path_in_repo = f"{model_id}/results_{sha}.json"
	local_path = os.path.join(tmp_dir, path_in_repo)
	os.makedirs(os.path.dirname(local_path), exist_ok=True)
	with open(local_path, "w") as f:
	f.write(dumped)

	commits.append(CommitOperationAdd(path_in_repo=path_in_repo, path_or_fileobj=local_path))

	API.create_commit(
	repo_id=RESULTS_REPO, commit_message="Add evaluation results", operations=commits, repo_type="dataset"
	)


	def backend_routine():
	try:
	_backend_routine()
	except Exception as e:
	logger.error(f"{e.__class__.__name__}: {str(e)}")

	if __name__=="__main__":
	backend_routine()