import glob import sys import pandas as pd from huggingface_hub import hf_hub_download, upload_file from huggingface_hub.utils._errors import EntryNotFoundError sys.path.append(".") from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv # noqa: E402 def has_previous_benchmark() -> str: csv_path = None try: csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE) except EntryNotFoundError: csv_path = None return csv_path def filter_float(value): if isinstance(value, str): return float(value.split()[0]) return value def push_to_hf_dataset(): all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv")) collate_csv(all_csvs, FINAL_CSV_FILE) # If there's an existing benchmark file, we should report the changes. csv_path = has_previous_benchmark() if csv_path is not None: current_results = pd.read_csv(FINAL_CSV_FILE) previous_results = pd.read_csv(csv_path) numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns numeric_columns = [ c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"] ] for column in numeric_columns: previous_results[column] = previous_results[column].map(lambda x: filter_float(x)) # Calculate the percentage change current_results[column] = current_results[column].astype(float) previous_results[column] = previous_results[column].astype(float) percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100 # Format the values with '+' or '-' sign and append to original values current_results[column] = current_results[column].map(str) + percent_change.map( lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)" ) # There might be newly added rows. So, filter out the NaNs. current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", "")) # Overwrite the current result file. current_results.to_csv(FINAL_CSV_FILE, index=False) commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results" upload_file( repo_id=REPO_ID, path_in_repo=FINAL_CSV_FILE, path_or_fileobj=FINAL_CSV_FILE, repo_type="dataset", commit_message=commit_message, ) if __name__ == "__main__": push_to_hf_dataset()