Spaces:
Sleeping
Sleeping
import argparse | |
import logging | |
import pprint | |
import os | |
from huggingface_hub import snapshot_download | |
import src.backend.run_eval_suite as run_eval_suite | |
import src.backend.manage_requests as manage_requests | |
import src.backend.sort_queue as sort_queue | |
import src.envs as envs | |
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' | |
logging.basicConfig(level=logging.ERROR) | |
pp = pprint.PrettyPrinter(width=80) | |
PENDING_STATUS = "PENDING" | |
RUNNING_STATUS = "RUNNING" | |
FINISHED_STATUS = "FINISHED" | |
FAILED_STATUS = "FAILED" | |
# import os | |
snapshot_download(repo_id=envs.RESULTS_REPO, revision="main", | |
local_dir=envs.EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60) | |
snapshot_download(repo_id=envs.QUEUE_REPO, revision="main", | |
local_dir=envs.EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60) | |
# exit() | |
# def run_auto_eval(args): | |
# if not args.reproduce: | |
# current_pending_status = [PENDING_STATUS] | |
# print('_________________') | |
# manage_requests.check_completed_evals( | |
# api=envs.API, | |
# checked_status=RUNNING_STATUS, | |
# completed_status=FINISHED_STATUS, | |
# failed_status=FAILED_STATUS, | |
# hf_repo=envs.QUEUE_REPO, | |
# local_dir=envs.EVAL_REQUESTS_PATH_BACKEND, | |
# hf_repo_results=envs.RESULTS_REPO, | |
# local_dir_results=envs.EVAL_RESULTS_PATH_BACKEND | |
# ) | |
# logging.info("Checked completed evals") | |
# eval_requests = manage_requests.get_eval_requests(job_status=current_pending_status, | |
# hf_repo=envs.QUEUE_REPO, | |
# local_dir=envs.EVAL_REQUESTS_PATH_BACKEND) | |
# logging.info("Got eval requests") | |
# eval_requests = sort_queue.sort_models_by_priority(api=envs.API, models=eval_requests) | |
# logging.info("Sorted eval requests") | |
# | |
# print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests") | |
# print(eval_requests) | |
# if len(eval_requests) == 0: | |
# print("No eval requests found. Exiting.") | |
# return | |
# | |
# if args.model is not None: | |
# eval_request = manage_requests.EvalRequest( | |
# model=args.model, | |
# status=PENDING_STATUS, | |
# precision=args.precision | |
# ) | |
# pp.pprint(eval_request) | |
# else: | |
# eval_request = eval_requests[0] | |
# pp.pprint(eval_request) | |
# | |
# # manage_requests.set_eval_request( | |
# # api=envs.API, | |
# # eval_request=eval_request, | |
# # new_status=RUNNING_STATUS, | |
# # hf_repo=envs.QUEUE_REPO, | |
# # local_dir=envs.EVAL_REQUESTS_PATH_BACKEND | |
# # ) | |
# # logging.info("Set eval request to running, now running eval") | |
# | |
# run_eval_suite.run_evaluation( | |
# eval_request=eval_request, | |
# local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
# results_repo=envs.RESULTS_REPO, | |
# batch_size=1, | |
# device=envs.DEVICE, | |
# no_cache=True, | |
# need_check=not args.publish, | |
# write_results=args.update | |
# ) | |
# logging.info("Eval finished, now setting status to finished") | |
# else: | |
# eval_request = manage_requests.EvalRequest( | |
# model=args.model, | |
# status=PENDING_STATUS, | |
# precision=args.precision | |
# ) | |
# pp.pprint(eval_request) | |
# logging.info("Running reproducibility eval") | |
# | |
# run_eval_suite.run_evaluation( | |
# eval_request=eval_request, | |
# local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
# results_repo=envs.RESULTS_REPO, | |
# batch_size=1, | |
# device=envs.DEVICE, | |
# need_check=not args.publish, | |
# write_results=args.update | |
# ) | |
# logging.info("Reproducibility eval finished") | |
def run_auto_eval(args): | |
if not args.reproduce: | |
current_pending_status = [PENDING_STATUS] | |
print('_________________') | |
manage_requests.check_completed_evals( | |
api=envs.API, | |
checked_status=RUNNING_STATUS, | |
completed_status=FINISHED_STATUS, | |
failed_status=FAILED_STATUS, | |
hf_repo=envs.QUEUE_REPO, | |
local_dir=envs.EVAL_REQUESTS_PATH_BACKEND, | |
hf_repo_results=envs.RESULTS_REPO, | |
local_dir_results=envs.EVAL_RESULTS_PATH_BACKEND | |
) | |
logging.info("Checked completed evals") | |
eval_requests = manage_requests.get_eval_requests( | |
job_status=current_pending_status, | |
hf_repo=envs.QUEUE_REPO, | |
local_dir=envs.EVAL_REQUESTS_PATH_BACKEND | |
) | |
logging.info("Got eval requests") | |
eval_requests = sort_queue.sort_models_by_priority(api=envs.API, models=eval_requests) | |
logging.info("Sorted eval requests") | |
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests") | |
if len(eval_requests) == 0: | |
print("No eval requests found. Exiting.") | |
return | |
for eval_request in eval_requests: | |
pp.pprint(eval_request) | |
run_eval_suite.run_evaluation( | |
eval_request=eval_request, | |
local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
results_repo=envs.RESULTS_REPO, | |
batch_size=1, | |
device=envs.DEVICE, | |
no_cache=True, | |
need_check=not args.publish, | |
write_results=args.update | |
) | |
logging.info(f"Eval finished for model {eval_request.model}, now setting status to finished") | |
# Update the status to FINISHED | |
manage_requests.set_eval_request( | |
api=envs.API, | |
eval_request=eval_request, | |
new_status=FINISHED_STATUS, | |
hf_repo=envs.QUEUE_REPO, | |
local_dir=envs.EVAL_REQUESTS_PATH_BACKEND | |
) | |
else: | |
eval_request = manage_requests.EvalRequest( | |
model=args.model, | |
status=PENDING_STATUS, | |
precision=args.precision | |
) | |
pp.pprint(eval_request) | |
logging.info("Running reproducibility eval") | |
run_eval_suite.run_evaluation( | |
eval_request=eval_request, | |
local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
results_repo=envs.RESULTS_REPO, | |
batch_size=1, | |
device=envs.DEVICE, | |
need_check=not args.publish, | |
write_results=args.update | |
) | |
logging.info("Reproducibility eval finished") | |
def main(): | |
parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature") | |
# Optional arguments | |
parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results") | |
parser.add_argument("--model", type=str, default=None, help="Your Model ID") | |
parser.add_argument("--precision", type=str, default="float16", help="Precision of your model") | |
parser.add_argument("--publish", type=bool, default=True, help="whether directly publish the evaluation results on HF") | |
parser.add_argument("--update", type=bool, default=False, help="whether to update google drive files") | |
args = parser.parse_args() | |
run_auto_eval(args) | |
if __name__ == "__main__": | |
main() | |