Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import json | |
import os | |
import re | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from huggingface_hub import HfApi | |
from src.backend import backend_routine | |
from src.logging import configure_root_logger, setup_logger | |
configure_root_logger() | |
logger = setup_logger(__name__) | |
API = HfApi(token=os.environ.get("TOKEN")) | |
RESULTS_REPO = f"open-rl-leaderboard/results" | |
ALL_ENV_IDS = { | |
"Atari": [ | |
"Adventure", | |
"AirRaid", | |
"Alien", | |
"Amidar", | |
"Assault", | |
"Asterix", | |
"Asteroids", | |
"Atlantis", | |
"BankHeist", | |
"BattleZone", | |
"BeamRider", | |
"Berzerk", | |
"Bowling", | |
"Boxing", | |
"Breakout", | |
"Carnival", | |
"Centipede", | |
"ChopperCommand", | |
"CrazyClimber", | |
"Defender", | |
"DemonAttack", | |
"DoubleDunk", | |
"ElevatorAction", | |
"Enduro", | |
"FishingDerby", | |
"Freeway", | |
"Frostbite", | |
"Gopher", | |
"Gravitar", | |
"Hero", | |
"IceHockey", | |
"Jamesbond", | |
"JourneyEscape", | |
"Kangaroo", | |
"Krull", | |
"KungFuMaster", | |
"MontezumaRevenge", | |
"MsPacman", | |
"NameThisGame", | |
"Phoenix", | |
"Pitfall", | |
"Pong", | |
"Pooyan", | |
"PrivateEye", | |
"Qbert", | |
"Riverraid", | |
"RoadRunner", | |
"Robotank", | |
"Seaquest", | |
"Skiing", | |
"Solaris", | |
"SpaceInvaders", | |
"StarGunner", | |
"Tennis", | |
"TimePilot", | |
"Tutankham", | |
"UpNDown", | |
"Venture", | |
"VideoPinball", | |
"WizardOfWor", | |
"YarsRevenge", | |
"Zaxxon", | |
], | |
"Box2D": [ | |
"LunarLander-v2", | |
"LunarLanderContinuous-v2", | |
"BipedalWalker-v3", | |
"BipedalWalkerHardcore-v3", | |
"CarRacing-v2", | |
], | |
"Toy text": [ | |
"Blackjack-v1", | |
"FrozenLake-v1", | |
"FrozenLake8x8-v1", | |
"CliffWalking-v0", | |
], | |
"Classic control": [ | |
"Acrobot-v1", | |
"CartPole-v1", | |
"MountainCar-v0", | |
"MountainCarContinuous-v0", | |
"Pendulum-v1", | |
], | |
"MuJoCo": [ | |
"Reacher-v4", | |
"Pusher-v4", | |
"InvertedPendulum-v4", | |
"InvertedDoublePendulum-v4", | |
"HalfCheetah-v4", | |
"Hopper-v4", | |
"Swimmer-v4", | |
"Walker2d-v4", | |
"Ant-v4", | |
"Humanoid-v4", | |
"HumanoidStandup-v4", | |
], | |
} | |
def get_leaderboard_df(): | |
# List all results files in results repo | |
pattern = re.compile(r"^[^/]*/[^/]*/[^/]*results_[a-f0-9]+\.json$") | |
filenames = API.list_repo_files(RESULTS_REPO, repo_type="dataset") | |
filenames = [filename for filename in filenames if pattern.match(filename)] | |
data = [] | |
for filename in filenames: | |
path = API.hf_hub_download(repo_id=RESULTS_REPO, filename=filename, repo_type="dataset") | |
with open(path) as fp: | |
report = json.load(fp) | |
user_id, model_id = report["config"]["model_id"].split("/") | |
row = {"user_id": user_id, "model_id": model_id} | |
if report["status"] == "DONE" and len(report["results"]) > 0: | |
env_ids = list(report["results"].keys()) | |
assert len(env_ids) == 1, "Only one environment supported for the moment" | |
row["env_id"] = env_ids[0] | |
row["mean_episodic_return"] = np.mean(report["results"][env_ids[0]]["episodic_returns"]) | |
data.append(row) | |
df = pd.DataFrame(data) # create DataFrame | |
df = df.fillna("") # replace NaN values with empty strings | |
return df | |
TITLE = """ | |
🚀 Open RL Leaderboard | |
""" | |
INTRODUCTION_TEXT = """ | |
Welcome to the Open RL Leaderboard! This is a community-driven benchmark for reinforcement learning models. | |
""" | |
ABOUT_TEXT = """ | |
The Open RL Leaderboard is a community-driven benchmark for reinforcement learning models. | |
""" | |
def select_env(df: pd.DataFrame, env_id: str): | |
df = df[df["env_id"] == env_id] | |
df = df.sort_values("mean_episodic_return", ascending=False) | |
df["ranking"] = np.arange(1, len(df) + 1) | |
return df | |
def format_df(df: pd.DataFrame): | |
# Add hyperlinks | |
df = df.copy() | |
for index, row in df.iterrows(): | |
user_id = row["user_id"] | |
model_id = row["model_id"] | |
df.loc[index, "user_id"] = f"[{user_id}](https://huggingface.co/{user_id})" | |
df.loc[index, "model_id"] = f"[{model_id}](https://huggingface.co/{user_id}/{model_id})" | |
# Keep only the relevant columns | |
df = df[["ranking", "user_id", "model_id", "mean_episodic_return"]] | |
return df.values.tolist() | |
with gr.Blocks() as demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("🏅 Leaderboard"): | |
df = get_leaderboard_df() | |
for env_domain, env_ids in ALL_ENV_IDS.items(): | |
with gr.TabItem(env_domain): | |
for env_id in env_ids: | |
with gr.TabItem(env_id): | |
with gr.Row(equal_height=False): | |
if env_domain == "Atari": | |
env_id = f"{env_id}NoFrameskip-v4" | |
env_df = select_env(df, env_id) | |
gr.components.Dataframe( | |
value=format_df(env_df), | |
headers=["🏆 Ranking", "🧑 User", "🤖 Model id", "📊 Mean episodic return"], | |
datatype=["number", "markdown", "markdown", "number"], | |
row_count=(10, "fixed"), | |
scale=3, | |
) | |
# Get the best model and | |
if not env_df.empty: | |
user_id = env_df.iloc[0]["user_id"] | |
model_id = env_df.iloc[0]["model_id"] | |
video_path = API.hf_hub_download( | |
repo_id=f"{user_id}/{model_id}", | |
filename="replay.mp4", | |
revision="main", | |
repo_type="model", | |
) | |
video = gr.PlayableVideo( | |
video_path, | |
autoplay=True, | |
scale=1, | |
min_width=50, | |
show_download_button=False, | |
label=model_id, | |
) | |
# Doesn't loop for the moment, see https://github.com/gradio-app/gradio/issues/7689 | |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): | |
gr.Markdown(ABOUT_TEXT) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(func=backend_routine, trigger="interval", seconds=10 * 60, max_instances=1) | |
scheduler.start() | |
if __name__ == "__main__": | |
demo.queue().launch() | |