leaderboard

Running on CPU Upgrade

qgallouedec HF staff commited on May 10

Commit

1b0277d

•

1 Parent(s): 0233854

fix model count

Files changed (3) hide show

app.py CHANGED Viewed

@@ -136,19 +136,21 @@ def get_leaderboard_df():
         try:
             with open(filename) as fp:
                 report = json.load(fp)
-            user_id, model_id = report["config"]["model_id"].split("/")
-            row = {"user_id": user_id, "model_id": model_id, "model_sha": report["config"]["model_sha"]}
             if report["status"] == "DONE" and len(report["results"]) > 0:
                 env_ids = list(report["results"].keys())
                 assert len(env_ids) == 1, "Only one environment supported for the moment"
                 row["env_id"] = env_ids[0]
                 row["iqm_episodic_return"] = iqm(report["results"][env_ids[0]]["episodic_returns"])
-            data.append(row)
         except Exception as e:
             logger.error(f"Error while processing {filename}: {e}")
     df = pd.DataFrame(data)  # create DataFrame
     df = df.fillna("")  # replace NaN values with empty strings
     return df
@@ -295,7 +297,6 @@ with gr.Blocks(css=css) as demo:
                 # Load the first video of the first environment
                 demo.load(refresh_one_video(df, env_ids[0]), outputs=[all_gr_videos[env_ids[0]]])
         with gr.TabItem("🚀 Getting my agent evaluated"):
             with open("texts/getting_my_agent_evaluated.md") as fp:
                 gr.Markdown(fp.read())

         try:
             with open(filename) as fp:
                 report = json.load(fp)
             if report["status"] == "DONE" and len(report["results"]) > 0:
+                user_id, model_id = report["config"]["model_id"].split("/")
+                row = {"user_id": user_id, "model_id": model_id, "model_sha": report["config"]["model_sha"]}
                 env_ids = list(report["results"].keys())
                 assert len(env_ids) == 1, "Only one environment supported for the moment"
                 row["env_id"] = env_ids[0]
                 row["iqm_episodic_return"] = iqm(report["results"][env_ids[0]]["episodic_returns"])
+                data.append(row)
         except Exception as e:
             logger.error(f"Error while processing {filename}: {e}")
     df = pd.DataFrame(data)  # create DataFrame
     df = df.fillna("")  # replace NaN values with empty strings
+    # Save to csv
+    df.to_csv("leaderboard.csv", index=False)
     return df
                 # Load the first video of the first environment
                 demo.load(refresh_one_video(df, env_ids[0]), outputs=[all_gr_videos[env_ids[0]]])
         with gr.TabItem("🚀 Getting my agent evaluated"):
             with open("texts/getting_my_agent_evaluated.md") as fp:
                 gr.Markdown(fp.read())

src/backend.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 import re
 import tempfile
@@ -48,7 +49,7 @@ def _backend_routine():
     # Run an evaluation on the models
     with tempfile.TemporaryDirectory() as tmp_dir:
         commits = []
-        model_id, sha = pending_models[0]
         logger.info(f"Running evaluation on {model_id}")
         report = {"config": {"model_id": model_id, "model_sha": sha}}
         try:
@@ -84,5 +85,6 @@ def backend_routine():
     except Exception as e:
         logger.error(f"{e.__class__.__name__}: {str(e)}")
-if __name__=="__main__":
-    backend_routine()

 import json
 import os
+import random
 import re
 import tempfile
     # Run an evaluation on the models
     with tempfile.TemporaryDirectory() as tmp_dir:
         commits = []
+        model_id, sha = random.choice(pending_models)
         logger.info(f"Running evaluation on {model_id}")
         report = {"config": {"model_id": model_id, "model_sha": sha}}
         try:
     except Exception as e:
         logger.error(f"{e.__class__.__name__}: {str(e)}")
+if __name__ == "__main__":
+    backend_routine()

src/evaluation.py CHANGED Viewed

@@ -112,6 +112,7 @@ ALL_ENV_IDS = [
 NUM_EPISODES = 50
 class NoopResetEnv(gym.Wrapper[np.ndarray, int, np.ndarray, int]):
     """
     Sample initial states by taking random number of no-ops on reset.

 NUM_EPISODES = 50
 class NoopResetEnv(gym.Wrapper[np.ndarray, int, np.ndarray, int]):
     """
     Sample initial states by taking random number of no-ops on reset.