adeol20 commited on
Commit
58917bf
1 Parent(s): 67cb5cf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -2
README.md CHANGED
@@ -30,8 +30,29 @@ TODO: Add your code
30
 
31
 
32
  ```python
33
- from stable_baselines3 import ...
34
- from huggingface_sb3 import load_from_hub
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ...
37
  ```
 
30
 
31
 
32
  ```python
 
 
33
 
34
+ from stable_baselines3 import PPO
35
+ from huggingface_sb3 import load_from_hub, package_to_hub
36
+ from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
37
+
38
+ env_id = "PandaReachDense-v3"
39
+ env = gym.make(env_id)
40
+ env = make_vec_env(env_id, n_envs=4)
41
+ env = VecNormalize(env, training=True, norm_obs=True, norm_reward=True, gamma=0.5, epsilon=1e-10, norm_obs_keys=None)
42
+
43
+ model = PPO("MultiInputPolicy", env, verbose=1)
44
+ model.learn(1_000_000)
45
+
46
+ eval_env = DummyVecEnv([lambda: gym.make("PandaReachDense-v3")])
47
+ eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)
48
+ eval_env.render_mode = "rgb_array"
49
+ eval_env.training = False
50
+ # reward normalization is not needed at test time
51
+ eval_env.norm_reward = False
52
+
53
+
54
+ model = PPO.load("Slay-PandaReachDense-v3")
55
+ mean_reward, std_reward = evaluate_policy(model, eval_env)
56
+ print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")
57
  ...
58
  ```