Spaces:

cot-leaderboard
/

open-cot-dashboard

Running

Gregor Betz commited on May 19

Commit

ed93a1c

•

1 Parent(s): c58d788

logs

Files changed (1) hide show

backend/data.py CHANGED Viewed

@@ -18,12 +18,13 @@ def load_cot_data():
     ####
     # download raw data
     snapshot_download(
         repo_id=EVAL_DATASET,
         revision="main",
         local_dir=EVAL_RESULTS_PATH,
         repo_type="dataset",
-        max_workers=60,
         token=TOKEN
     )
@@ -86,7 +87,7 @@ def load_cot_data():
     df_cot_avg["task"] = "all"
     # add average results to cot df
-    df_cot = pd.concat([df_cot_avg, df_cot], ignore_index=True)
     ####
@@ -94,7 +95,8 @@ def load_cot_data():
     ####
     # load traces data and extract configs
-    dataset = datasets.load_dataset(TRACES_DATASET, split="test", token=TOKEN)
     dataset = dataset.select_columns(["config_data"])
     df_cottraces = pd.DataFrame({"config_data": dataset["config_data"]})
     del dataset
@@ -126,6 +128,9 @@ def load_cot_data():
     for col in ['acc_base', 'acc_cot', 'acc_gain']:
         df_cot[col] = 100 * df_cot[col]
     ####
     # Create error dataframe
     ####
@@ -136,4 +141,8 @@ def load_cot_data():
     df_cot_err.reset_index(inplace=True)
     df_cot_err.rename(columns={"acc_base-mean": "base accuracy", "acc_cot-mean": "cot accuracy", "acc_gain-mean": "marginal acc. gain"}, inplace=True)
     return df_cot_err, df_cot

     ####
     # download raw data
+    print("Downloading evaluation results...")
     snapshot_download(
         repo_id=EVAL_DATASET,
         revision="main",
         local_dir=EVAL_RESULTS_PATH,
         repo_type="dataset",
+        max_workers=8,
         token=TOKEN
     )
     df_cot_avg["task"] = "all"
     # add average results to cot df
+    df_cot = pd.concat([df_cot_avg, df_cot], ignore_index=True)
     ####
     ####
     # load traces data and extract configs
+    print("Loading traces data...")
+    dataset = datasets.load_dataset(TRACES_DATASET, split="test", token=TOKEN, num_proc=8)
     dataset = dataset.select_columns(["config_data"])
     df_cottraces = pd.DataFrame({"config_data": dataset["config_data"]})
     del dataset
     for col in ['acc_base', 'acc_cot', 'acc_gain']:
         df_cot[col] = 100 * df_cot[col]
+    print("Regimes dataframe created:")
+    print(df_cot.head(3))
     ####
     # Create error dataframe
     ####
     df_cot_err.reset_index(inplace=True)
     df_cot_err.rename(columns={"acc_base-mean": "base accuracy", "acc_cot-mean": "cot accuracy", "acc_gain-mean": "marginal acc. gain"}, inplace=True)
+    print("Error dataframe created:")
+    print(df_cot_err.head(3))
     return df_cot_err, df_cot