import json import os import re import gradio as gr import numpy as np import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import HfApi from src.backend import backend_routine from src.logging import configure_root_logger, setup_logger configure_root_logger() logger = setup_logger(__name__) API = HfApi(token=os.environ.get("TOKEN")) RESULTS_REPO = f"open-rl-leaderboard/results" ALL_ENV_IDS = { "Atari": [ "Adventure", "AirRaid", "Alien", "Amidar", "Assault", "Asterix", "Asteroids", "Atlantis", "BankHeist", "BattleZone", "BeamRider", "Berzerk", "Bowling", "Boxing", "Breakout", "Carnival", "Centipede", "ChopperCommand", "CrazyClimber", "Defender", "DemonAttack", "DoubleDunk", "ElevatorAction", "Enduro", "FishingDerby", "Freeway", "Frostbite", "Gopher", "Gravitar", "Hero", "IceHockey", "Jamesbond", "JourneyEscape", "Kangaroo", "Krull", "KungFuMaster", "MontezumaRevenge", "MsPacman", "NameThisGame", "Phoenix", "Pitfall", "Pong", "Pooyan", "PrivateEye", "Qbert", "Riverraid", "RoadRunner", "Robotank", "Seaquest", "Skiing", "Solaris", "SpaceInvaders", "StarGunner", "Tennis", "TimePilot", "Tutankham", "UpNDown", "Venture", "VideoPinball", "WizardOfWor", "YarsRevenge", "Zaxxon", ], "Box2D": [ "LunarLander-v2", "LunarLanderContinuous-v2", "BipedalWalker-v3", "BipedalWalkerHardcore-v3", "CarRacing-v2", ], "Toy text": [ "Blackjack-v1", "FrozenLake-v1", "FrozenLake8x8-v1", "CliffWalking-v0", ], "Classic control": [ "Acrobot-v1", "CartPole-v1", "MountainCar-v0", "MountainCarContinuous-v0", "Pendulum-v1", ], "MuJoCo": [ "Reacher-v4", "Pusher-v4", "InvertedPendulum-v4", "InvertedDoublePendulum-v4", "HalfCheetah-v4", "Hopper-v4", "Swimmer-v4", "Walker2d-v4", "Ant-v4", "Humanoid-v4", "HumanoidStandup-v4", ], } def get_leaderboard_df(): # List all results files in results repo pattern = re.compile(r"^[^/]*/[^/]*/[^/]*results_[a-f0-9]+\.json$") filenames = API.list_repo_files(RESULTS_REPO, repo_type="dataset") filenames = [filename for filename in filenames if pattern.match(filename)] data = [] for filename in filenames: path = API.hf_hub_download(repo_id=RESULTS_REPO, filename=filename, repo_type="dataset") with open(path) as fp: report = json.load(fp) user_id, model_id = report["config"]["model_id"].split("/") row = {"user_id": user_id, "model_id": model_id} if report["status"] == "DONE" and len(report["results"]) > 0: env_ids = list(report["results"].keys()) assert len(env_ids) == 1, "Only one environment supported for the moment" row["env_id"] = env_ids[0] row["mean_episodic_return"] = np.mean(report["results"][env_ids[0]]["episodic_returns"]) data.append(row) df = pd.DataFrame(data) # create DataFrame df = df.fillna("") # replace NaN values with empty strings return df TITLE = """ 🚀 Open RL Leaderboard """ INTRODUCTION_TEXT = """ Welcome to the Open RL Leaderboard! This is a community-driven benchmark for reinforcement learning models. """ ABOUT_TEXT = """ The Open RL Leaderboard is a community-driven benchmark for reinforcement learning models. """ def select_env(df: pd.DataFrame, env_id: str): df = df[df["env_id"] == env_id] df = df.sort_values("mean_episodic_return", ascending=False) df["ranking"] = np.arange(1, len(df) + 1) return df def format_df(df: pd.DataFrame): # Add hyperlinks df = df.copy() for index, row in df.iterrows(): user_id = row["user_id"] model_id = row["model_id"] df.loc[index, "user_id"] = f"[{user_id}](https://huggingface.co/{user_id})" df.loc[index, "model_id"] = f"[{model_id}](https://huggingface.co/{user_id}/{model_id})" # Keep only the relevant columns df = df[["ranking", "user_id", "model_id", "mean_episodic_return"]] return df.values.tolist() with gr.Blocks() as demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 Leaderboard"): df = get_leaderboard_df() for env_domain, env_ids in ALL_ENV_IDS.items(): with gr.TabItem(env_domain): for env_id in env_ids: with gr.TabItem(env_id): with gr.Row(equal_height=False): if env_domain == "Atari": env_id = f"{env_id}NoFrameskip-v4" env_df = select_env(df, env_id) gr.components.Dataframe( value=format_df(env_df), headers=["🏆 Ranking", "🧑 User", "🤖 Model id", "📊 Mean episodic return"], datatype=["number", "markdown", "markdown", "number"], row_count=(10, "fixed"), scale=3, ) # Get the best model and if not env_df.empty: user_id = env_df.iloc[0]["user_id"] model_id = env_df.iloc[0]["model_id"] video_path = API.hf_hub_download( repo_id=f"{user_id}/{model_id}", filename="replay.mp4", revision="main", repo_type="model", ) video = gr.PlayableVideo( video_path, autoplay=True, scale=1, min_width=50, show_download_button=False, label=model_id, ) # Doesn't loop for the moment, see https://github.com/gradio-app/gradio/issues/7689 with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): gr.Markdown(ABOUT_TEXT) scheduler = BackgroundScheduler() scheduler.add_job(func=backend_routine, trigger="interval", seconds=10 * 60, max_instances=1) scheduler.start() if __name__ == "__main__": demo.queue().launch()