Spaces:

ThomasSimonini
/

Check-my-progress-Deep-RL-Course

Running

File size: 8,473 Bytes

import gradio as gr
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.repocard import metadata_load

import pandas as pd

import requests

from utils import *

api = HfApi()

def get_user_models(hf_username, env_tag, lib_tag):
    """
    List the Reinforcement Learning models
    from user given environment and lib
    :param hf_username: User HF username
    :param env_tag: Environment tag
    :param lib_tag: Library tag
    """
    api = HfApi()
    models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag])

    user_model_ids = [x.modelId for x in models]
    return user_model_ids


def get_user_sf_models(hf_username, env_tag, lib_tag):
    api = HfApi()
    models_sf = []
    models = api.list_models(author=hf_username, filter=["reinforcement-learning", lib_tag])

    user_model_ids = [x.modelId for x in models]

    for model in user_model_ids:
        meta = get_metadata(model)
        if meta is None:
            continue
        result = meta["model-index"][0]["results"][0]["dataset"]["name"]
        if result == env_tag:
            models_sf.append(model)
            
    return models_sf


def get_metadata(model_id):
  """
  Get model metadata (contains evaluation data)
  :param model_id
  """
  try:
    readme_path = hf_hub_download(model_id, filename="README.md")
    return metadata_load(readme_path)
  except requests.exceptions.HTTPError:
    # 404 README.md not found
    return None


def parse_metrics_accuracy(meta):
  """
  Get model results and parse it
  :param meta: model metadata
  """
  if "model-index" not in meta:
    return None
  result = meta["model-index"][0]["results"]
  metrics = result[0]["metrics"]
  accuracy = metrics[0]["value"]
  
  return accuracy


def parse_rewards(accuracy):
  """
  Parse mean_reward and std_reward
  :param accuracy: model results
  """
  default_std = -1000
  default_reward= -1000
  if accuracy !=  None:
      accuracy = str(accuracy)
      parsed =  accuracy.split(' +/- ')
      if len(parsed)>1:
          mean_reward = float(parsed[0])
          std_reward =  float(parsed[1])
      elif len(parsed)==1: #only mean reward   
          mean_reward = float(parsed[0])
          std_reward =  float(0)
      else: 
          mean_reward = float(default_std)
          std_reward = float(default_reward)
  else:
      mean_reward = float(default_std)
      std_reward = float(default_reward)
  
  return mean_reward, std_reward

def calculate_best_result(user_model_ids):
  """
  Calculate the best results of a unit
  best_result = mean_reward - std_reward
  :param user_model_ids: RL models of a user
  """
  best_result = -1000
  best_model_id = ""
  for model in user_model_ids:
    meta = get_metadata(model)
    if meta is None:
      continue
    accuracy = parse_metrics_accuracy(meta)
    mean_reward, std_reward = parse_rewards(accuracy)
    result = mean_reward - std_reward
    if result > best_result:
      best_result = result
      best_model_id = model
      
  return best_result, best_model_id

def check_if_passed(model):
  """
  Check if result >= baseline
  to know if you pass
  :param model: user model
  """
  if model["best_result"] >= model["min_result"]:
    model["passed_"] = True

def certification(hf_username):
  results_certification = [
      {
          "unit": "Unit 1",
          "env": "LunarLander-v2",
          "library": "stable-baselines3",
          "min_result": 200,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
      },
  {
          "unit": "Unit 2",
          "env": "Taxi-v3",
          "library": "q-learning",
          "min_result": 4,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  {
          "unit": "Unit 3",
          "env": "SpaceInvadersNoFrameskip-v4",
          "library": "stable-baselines3",
          "min_result": 200,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
  {
          "unit": "Unit 4",
          "env": "CartPole-v1",
          "library": "reinforce",
          "min_result": 350,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
  },
    {
          "unit": "Unit 4",
          "env": "Pixelcopter-PLE-v0",
          "library": "reinforce",
          "min_result": 5,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
    },
      {
          "unit": "Unit 5",
          "env": "ML-Agents-SnowballTarget",
          "library": "ml-agents",
          "min_result": -100,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
    },
      {
          "unit": "Unit 5",
          "env": "ML-Agents-Pyramids",
          "library": "ml-agents",
          "min_result": -100,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
    },
      {
          "unit": "Unit 6",
          "env": "PandaReachDense",
          "library": "stable-baselines3",
          "min_result": -3.5,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
    },
      {
          "unit": "Unit 7",
          "env": "ML-Agents-SoccerTwos",
          "library": "ml-agents",
          "min_result": -100,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
    },
      {
          "unit": "Unit 8 PI",
          "env": "LunarLander-v2",
          "library": "deep-rl-course",
          "min_result": -500,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
    },
      {
          "unit": "Unit 8 PII",
          "env": "doom_health_gathering_supreme",
          "library": "sample-factory",
          "min_result": 5,
          "best_result": 0,
          "best_model_id": "",
          "passed_": False
    },
  ] 
    
  for unit in results_certification:
    if unit["unit"] == "Unit 6":
      # Since Unit 6 can use PandaReachDense-v2 or v3
      user_models = get_user_models(hf_username, "PandaReachDense-v3", unit["library"])
      if len(user_models) == 0:
        print("Empty")
        user_models = get_user_models(hf_username, "PandaReachDense-v2", unit["library"])
    elif unit["unit"] != "Unit 8 PII":
      # Get user model
      user_models = get_user_models(hf_username, unit['env'], unit['library'])
      # For sample factory vizdoom we don't have env tag for now
    else: 
      user_models = get_user_sf_models(hf_username, unit['env'], unit['library'])
    
    # Calculate the best result and get the best_model_id
    best_result, best_model_id = calculate_best_result(user_models)

    # Save best_result and best_model_id
    unit["best_result"] = best_result
    unit["best_model_id"] = make_clickable_model(best_model_id)

    # Based on best_result do we pass the unit?
    check_if_passed(unit)
    unit["passed"] = pass_emoji(unit["passed_"])
    
  print(results_certification)
 
  df = pd.DataFrame(results_certification)
  df = df[['passed', 'unit', 'env', 'min_result', 'best_result', 'best_model_id']]
  return df


with gr.Blocks() as demo:
    gr.Markdown(f"""
    # 🏆 Check your progress in the Deep Reinforcement Learning Course 🏆
    You can check your progress here.
    
    - To get a certificate of completion, you must **pass 80% of the assignments**.
    - To get an honors certificate, you must **pass 100% of the assignments**.

    There's **no deadlines, the course is self-paced**.

    To pass an assignment your model result (mean_reward - std_reward) must be >= min_result

    **When min_result = -100 it means that you just need to push a model to pass this hands-on. No need to reach a certain result.**
    
    Just type your Hugging Face Username 🤗 (in my case ThomasSimonini)
    """)
    
    hf_username = gr.Textbox(placeholder="ThomasSimonini", label="Your Hugging Face Username")
    #email = gr.Textbox(placeholder="[email protected]", label="Your Email (to receive your certificate)")
    check_progress_button = gr.Button(value="Check my progress")
    output = gr.components.Dataframe(value= certification(hf_username), headers=["Pass?", "Unit", "Environment", "Baseline", "Your best result", "Your best model id"], datatype=["markdown", "markdown", "markdown", "number", "number", "markdown", "bool"])
    check_progress_button.click(fn=certification, inputs=hf_username, outputs=output)

demo.launch()