Quentin Gallouédec commited on
Commit
de52ad3
1 Parent(s): 2339ca6

log raw results

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. src/evaluation.py +3 -5
app.py CHANGED
@@ -4,6 +4,7 @@ import os
4
  import pprint
5
 
6
  import gradio as gr
 
7
  import pandas as pd
8
  from apscheduler.schedulers.background import BackgroundScheduler
9
  from huggingface_hub import snapshot_download
@@ -119,7 +120,7 @@ def get_leaderboard_df():
119
  model_id = report["config"]["model_id"]
120
  row = {"Agent": model_id, "Status": report["status"]}
121
  if report["status"] == "DONE":
122
- results = {env_id: result["episodic_return_mean"] for env_id, result in report["results"].items()}
123
  row.update(results)
124
  data.append(row)
125
 
 
4
  import pprint
5
 
6
  import gradio as gr
7
+ import numpy as np
8
  import pandas as pd
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
  from huggingface_hub import snapshot_download
 
120
  model_id = report["config"]["model_id"]
121
  row = {"Agent": model_id, "Status": report["status"]}
122
  if report["status"] == "DONE":
123
+ results = {env_id: np.mean(result["episodic_return"]) for env_id, result in report["results"].items()}
124
  row.update(results)
125
  data.append(row)
126
 
src/evaluation.py CHANGED
@@ -268,10 +268,8 @@ def evaluate(model_id, revision):
268
  for info in infos["final_info"]:
269
  if info is None or "episode" not in info:
270
  continue
271
- episodic_returns.append(info["episode"]["r"])
272
 
273
- mean_reward = float(np.mean(episodic_returns))
274
- std_reward = float(np.std(episodic_returns))
275
- results[env_id] = {"episodic_return_mean": mean_reward, "episodic_reward_std": std_reward}
276
- logger.info(f"Environment {env_id}: {mean_reward} ± {std_reward}")
277
  return results
 
268
  for info in infos["final_info"]:
269
  if info is None or "episode" not in info:
270
  continue
271
+ episodic_returns.append(float(info["episode"]["r"]))
272
 
273
+ results[env_id] = {"episodic_returns": episodic_returns}
274
+ logger.info(f"Environment {env_id}: {np.mean(episodic_returns)} ± {np.mean(episodic_returns)}")
 
 
275
  return results