{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "39edb4de", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:47:41.568423Z", "iopub.status.busy": "2024-05-14T13:47:41.568107Z", "iopub.status.idle": "2024-05-14T13:49:04.589038Z", "shell.execute_reply": "2024-05-14T13:49:04.587937Z" }, "papermill": { "duration": 83.029054, "end_time": "2024-05-14T13:49:04.591619", "exception": false, "start_time": "2024-05-14T13:47:41.562565", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "%%capture\n", "!pip install stable-baselines3[extra]\n", "!pip install moviepy" ] }, { "cell_type": "code", "execution_count": 2, "id": "a05c23fe", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:49:04.601146Z", "iopub.status.busy": "2024-05-14T13:49:04.600862Z", "iopub.status.idle": "2024-05-14T13:49:29.928361Z", "shell.execute_reply": "2024-05-14T13:49:29.927569Z" }, "papermill": { "duration": 25.334722, "end_time": "2024-05-14T13:49:29.930638", "exception": false, "start_time": "2024-05-14T13:49:04.595916", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-05-14 13:49:15.791029: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-05-14 13:49:15.791134: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-05-14 13:49:16.077316: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "from stable_baselines3 import DQN\n", "from stable_baselines3.common.monitor import Monitor\n", "from stable_baselines3.common.callbacks import BaseCallback, EvalCallback, CallbackList\n", "from stable_baselines3.common.logger import Video, HParam, TensorBoardOutputFormat\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", "\n", "from typing import Any, Dict\n", "\n", "import gymnasium as gym\n", "import torch as th\n", "import numpy as np\n", "\n", "# =====File names=====\n", "MODEL_FILE_NAME = \"ALE-Pacman-v5\"\n", "BUFFER_FILE_NAME = \"dqn_replay_buffer_pacman_v2-3\"\n", "POLICY_FILE_NAME = \"dqn_policy_pacman_v2-3\"\n", "\n", "# =====Model Config=====\n", "# Evaluate in 20ths\n", "EVAL_CALLBACK_FREQ = 50_000\n", "# Record in quarters (the last one won't record, will have to do manually)\n", "# If I record in quarters, but drop the frequency down to 240k instead of 250k, this might trigger a recording near the end.\n", "VIDEO_CALLBACK_FREQ = 240_000\n", "FRAMESKIP = 4\n", "NUM_TIMESTEPS = 1_000_000\n", "\n", "# =====Hyperparams=====\n", "EXPLORATION_FRACTION = 0.3\n", "# Buffer size needs to be less than about 60k in order to save it in a Kaggle instance\n", "# Increasing buffer size to 70K, should be able to store it.\n", "BUFFER_SIZE = 70_000\n", "BATCH_SIZE = 64\n", "LEARNING_STARTS = 50_000\n", "LEARNING_RATE = 0.00005\n", "GAMMA = 0.999\n", "FINAL_EPSILON = 0.05\n", "# Target Update Interval is set to 10k by default and looks like it is set to \n", "# 4 in the Nature paper. This is a large discrepency and makes me wonder if it \n", "# is something different or measured differently...\n", "TARGET_UPDATE_INTERVAL = 1_000\n", "\n", "# =====Custom objects for hyperparam modification=====\n", "CUSTOM_OBJECTS = {\n", " \"exploration_fraction\": EXPLORATION_FRACTION, \n", " \"buffer_size\": BUFFER_SIZE,\n", " \"batch_size\": BATCH_SIZE,\n", " \"learning_starts\": LEARNING_STARTS,\n", " \"learning_rate\": LEARNING_RATE,\n", " \"gamma\": GAMMA,\n", " \"target_update_interval\": TARGET_UPDATE_INTERVAL,\n", " \"exploration_final_eps\": FINAL_EPSILON,\n", " \"tensorboard_log\": \"./\",\n", " \"verbose\": 1}" ] }, { "cell_type": "code", "execution_count": 3, "id": "d6137ec3", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:49:29.939486Z", "iopub.status.busy": "2024-05-14T13:49:29.938952Z", "iopub.status.idle": "2024-05-14T13:49:29.949719Z", "shell.execute_reply": "2024-05-14T13:49:29.948866Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.017084, "end_time": "2024-05-14T13:49:29.951510", "exception": false, "start_time": "2024-05-14T13:49:29.934426", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# VideoRecorderCallback\n", "# The VideoRecorderCallback should record a video of the agent in the evaluation environment\n", "# every render_freq timesteps. It will record one episode. It will also record one episode when\n", "# the training has been completed\n", "\n", "class VideoRecorderCallback(BaseCallback):\n", " def __init__(self, eval_env: gym.Env, render_freq: int, n_eval_episodes: int = 1, deterministic: bool = True):\n", " \"\"\"\n", " Records a video of an agent's trajectory traversing ``eval_env`` and logs it to TensorBoard.\n", " :param eval_env: A gym environment from which the trajectory is recorded\n", " :param render_freq: Render the agent's trajectory every eval_freq call of the callback.\n", " :param n_eval_episodes: Number of episodes to render\n", " :param deterministic: Whether to use deterministic or stochastic policy\n", " \"\"\"\n", " super().__init__()\n", " self._eval_env = eval_env\n", " self._render_freq = render_freq\n", " self._n_eval_episodes = n_eval_episodes\n", " self._deterministic = deterministic\n", "\n", " def _on_step(self) -> bool:\n", " if self.n_calls % self._render_freq == 0:\n", " screens = []\n", "\n", " def grab_screens(_locals: Dict[str, Any], _globals: Dict[str, Any]) -> None:\n", " \"\"\"\n", " Renders the environment in its current state, recording the screen in the captured `screens` list\n", " :param _locals: A dictionary containing all local variables of the callback's scope\n", " :param _globals: A dictionary containing all global variables of the callback's scope\n", " \"\"\"\n", " screen = self._eval_env.render()\n", " # PyTorch uses CxHxW vs HxWxC gym (and tensorflow) image convention\n", " screens.append(screen.transpose(2, 0, 1))\n", "\n", " evaluate_policy(\n", " self.model,\n", " self._eval_env,\n", " callback=grab_screens,\n", " n_eval_episodes=self._n_eval_episodes,\n", " deterministic=self._deterministic,\n", " )\n", " self.logger.record(\n", " \"trajectory/video\",\n", " Video(th.from_numpy(np.array([screens])), fps=60),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " return True" ] }, { "cell_type": "code", "execution_count": 4, "id": "d301d5cd", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:49:29.960315Z", "iopub.status.busy": "2024-05-14T13:49:29.959818Z", "iopub.status.idle": "2024-05-14T13:49:29.971658Z", "shell.execute_reply": "2024-05-14T13:49:29.970816Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.018119, "end_time": "2024-05-14T13:49:29.973456", "exception": false, "start_time": "2024-05-14T13:49:29.955337", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# HParamCallback\n", "# This should log the hyperparameters specified and map the metrics that are logged to \n", "# the appropriate run.\n", "class HParamCallback(BaseCallback):\n", " \"\"\"\n", " Saves the hyperparameters and metrics at the start of the training, and logs them to TensorBoard.\n", " \"\"\" \n", " def __init__(self):\n", " super().__init__()\n", " \n", "\n", " def _on_training_start(self) -> None:\n", " \n", " hparam_dict = {\n", " \"algorithm\": self.model.__class__.__name__,\n", " \"policy\": self.model.policy.__class__.__name__,\n", " \"environment\": self.model.env.__class__.__name__,\n", " \"buffer_size\": self.model.buffer_size,\n", " \"batch_size\": self.model.batch_size,\n", " \"tau\": self.model.tau,\n", " \"gradient_steps\": self.model.gradient_steps,\n", " \"target_update_interval\": self.model.target_update_interval,\n", " \"exploration_fraction\": self.model.exploration_fraction,\n", " \"exploration_initial_eps\": self.model.exploration_initial_eps,\n", " \"exploration_final_eps\": self.model.exploration_final_eps,\n", " \"max_grad_norm\": self.model.max_grad_norm,\n", " \"tensorboard_log\": self.model.tensorboard_log,\n", " \"seed\": self.model.seed, \n", " \"learning rate\": self.model.learning_rate,\n", " \"gamma\": self.model.gamma, \n", " }\n", " # define the metrics that will appear in the `HPARAMS` Tensorboard tab by referencing their tag\n", " # Tensorbaord will find & display metrics from the `SCALARS` tab\n", " metric_dict = {\n", " \"eval/mean_ep_length\": 0,\n", " \"eval/mean_reward\": 0,\n", " \"rollout/ep_len_mean\": 0,\n", " \"rollout/ep_rew_mean\": 0,\n", " \"rollout/exploration_rate\": 0,\n", " \"time/_episode_num\": 0,\n", " \"time/fps\": 0,\n", " \"time/total_timesteps\": 0,\n", " \"train/learning_rate\": 0.0,\n", " \"train/loss\": 0.0,\n", " \"train/n_updates\": 0.0,\n", " \"locals/rewards\": 0.0,\n", " \"locals/infos_0_lives\": 0.0,\n", " \"locals/num_collected_steps\": 0.0,\n", " \"locals/num_collected_episodes\": 0.0\n", " }\n", " \n", " self.logger.record(\n", " \"hparams\",\n", " HParam(hparam_dict, metric_dict),\n", " exclude=(\"stdout\", \"log\", \"json\", \"csv\"),\n", " )\n", " \n", " def _on_step(self) -> bool:\n", " return True" ] }, { "cell_type": "code", "execution_count": 5, "id": "ef8d38ae", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:49:29.982282Z", "iopub.status.busy": "2024-05-14T13:49:29.981655Z", "iopub.status.idle": "2024-05-14T13:49:29.997110Z", "shell.execute_reply": "2024-05-14T13:49:29.996128Z" }, "jupyter": { "source_hidden": true }, "papermill": { "duration": 0.02195, "end_time": "2024-05-14T13:49:29.999257", "exception": false, "start_time": "2024-05-14T13:49:29.977307", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# PlotTensorboardValuesCallback\n", "# This callback should log values to tensorboard on every step. \n", "# The self.logger class should plot a new scalar value when recording.\n", "\n", "class PlotTensorboardValuesCallback(BaseCallback):\n", " \"\"\"\n", " Custom callback for plotting additional values in tensorboard.\n", " \"\"\"\n", " def __init__(self, eval_env: gym.Env, train_env: gym.Env, model: DQN, verbose=0):\n", " super().__init__(verbose)\n", " self._eval_env = eval_env\n", " self._train_env = train_env\n", " self._model = model\n", "\n", " def _on_training_start(self) -> None:\n", " output_formats = self.logger.output_formats\n", " # Save reference to tensorboard formatter object\n", " # note: the failure case (not formatter found) is not handled here, should be done with try/except.\n", " try:\n", " self.tb_formatter = next(formatter for formatter in output_formats if isinstance(formatter, TensorBoardOutputFormat))\n", " except:\n", " print(\"Exception thrown in tb_formatter initialization.\") \n", " \n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", "\n", " def _on_step(self) -> bool:\n", " self.logger.record(\"time/_episode_num\", self.model._episode_num, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"train/n_updates\", self.model._n_updates, exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/rewards\", self.locals[\"rewards\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/infos_0_lives\", self.locals[\"infos\"][0][\"lives\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_steps\", self.locals[\"num_collected_steps\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " self.logger.record(\"locals/num_collected_episodes\", self.locals[\"num_collected_episodes\"], exclude=(\"stdout\", \"log\", \"json\", \"csv\"))\n", " \n", " return True\n", " \n", " def _on_training_end(self) -> None:\n", " self.tb_formatter.writer.add_text(\"metadata/eval_env\", str(self._eval_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"metadata/train_env\", str(self._train_env.metadata), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net\", str(self._model.q_net), self.num_timesteps)\n", " self.tb_formatter.writer.flush()\n", " self.tb_formatter.writer.add_text(\"model/q_net_target\", str(self._model.q_net_target), self.num_timesteps)\n", " self.tb_formatter.writer.flush()" ] }, { "cell_type": "code", "execution_count": 6, "id": "b78c42cf", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:49:30.007706Z", "iopub.status.busy": "2024-05-14T13:49:30.007443Z", "iopub.status.idle": "2024-05-14T13:51:17.832432Z", "shell.execute_reply": "2024-05-14T13:51:17.831457Z" }, "papermill": { "duration": 107.832135, "end_time": "2024-05-14T13:51:17.835091", "exception": false, "start_time": "2024-05-14T13:49:30.002956", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n", "[Powered by Stella]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Wrapping the env with a `Monitor` wrapper\n", "Wrapping the env in a DummyVecEnv.\n", "Wrapping the env in a VecTransposeImage.\n" ] } ], "source": [ "# make the training and evaluation environments\n", "eval_env = Monitor(gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP))\n", "train_env = gym.make(\"ALE/Pacman-v5\", render_mode=\"rgb_array\", frameskip=FRAMESKIP)\n", "\n", "# Make the model with specified hyperparams\n", "# load the model\n", "# load the buffer\n", "# The loaded model still needs the hyperparameters to be passed to it, and the replay buffer\n", "model = DQN.load(\"/kaggle/input/dqn-pacmanv5-run2v3/ALE-Pacman-v5.zip\", \n", " env=train_env, \n", " custom_objects=CUSTOM_OBJECTS)\n", "model.load_replay_buffer(\"/kaggle/input/dqn-pacmanv5-run2v3/dqn_replay_buffer_pacman_v2-2\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "a17ac5b8", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:51:17.845549Z", "iopub.status.busy": "2024-05-14T13:51:17.844960Z", "iopub.status.idle": "2024-05-14T13:51:17.851540Z", "shell.execute_reply": "2024-05-14T13:51:17.850629Z" }, "papermill": { "duration": 0.013857, "end_time": "2024-05-14T13:51:17.853615", "exception": false, "start_time": "2024-05-14T13:51:17.839758", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Define the callbacks and put them in a list\n", "eval_callback = EvalCallback(\n", " eval_env,\n", " best_model_save_path=\"./best_model/\",\n", " log_path=\"./evals/\",\n", " eval_freq=EVAL_CALLBACK_FREQ,\n", " n_eval_episodes=10,\n", " deterministic=True,\n", " render=False)\n", "\n", "tbplot_callback = PlotTensorboardValuesCallback(eval_env=eval_env, train_env=train_env, model=model)\n", "video_callback = VideoRecorderCallback(eval_env, render_freq=VIDEO_CALLBACK_FREQ)\n", "hparam_callback = HParamCallback()\n", "\n", "callback_list = CallbackList([hparam_callback, eval_callback, video_callback, tbplot_callback])" ] }, { "cell_type": "code", "execution_count": 8, "id": "7c00b9e7", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T13:51:17.863121Z", "iopub.status.busy": "2024-05-14T13:51:17.862817Z", "iopub.status.idle": "2024-05-14T16:24:04.099984Z", "shell.execute_reply": "2024-05-14T16:24:04.099008Z" }, "papermill": { "duration": 9166.244516, "end_time": "2024-05-14T16:24:04.102181", "exception": false, "start_time": "2024-05-14T13:51:17.857665", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Logging to ././tb/_0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/stable_baselines3/common/callbacks.py:403: UserWarning: Training and eval env are not of the same type != \n", " warnings.warn(\"Training and eval env are not of the same type\" f\"{self.training_env} != {self.eval_env}\")\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 211 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4528 |\n", "| fps | 106 |\n", "| time_elapsed | 42 |\n", "| total_timesteps | 4504496 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0746 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 217 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4532 |\n", "| fps | 107 |\n", "| time_elapsed | 84 |\n", "| total_timesteps | 4509070 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.766 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.1e+03 |\n", "| ep_rew_mean | 222 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4536 |\n", "| fps | 107 |\n", "| time_elapsed | 123 |\n", "| total_timesteps | 4513284 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.174 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 224 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4540 |\n", "| fps | 108 |\n", "| time_elapsed | 162 |\n", "| total_timesteps | 4517533 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.134 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4544 |\n", "| fps | 108 |\n", "| time_elapsed | 207 |\n", "| total_timesteps | 4522513 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.15 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4548 |\n", "| fps | 108 |\n", "| time_elapsed | 247 |\n", "| total_timesteps | 4526789 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.259 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.08e+03 |\n", "| ep_rew_mean | 227 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4552 |\n", "| fps | 108 |\n", "| time_elapsed | 284 |\n", "| total_timesteps | 4530827 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.275 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4556 |\n", "| fps | 108 |\n", "| time_elapsed | 328 |\n", "| total_timesteps | 4535637 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.128 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4560 |\n", "| fps | 108 |\n", "| time_elapsed | 375 |\n", "| total_timesteps | 4540736 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0537 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4564 |\n", "| fps | 108 |\n", "| time_elapsed | 420 |\n", "| total_timesteps | 4545658 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.131 |\n", "----------------------------------\n", "Eval num_timesteps=4550000, episode_reward=153.00 +/- 49.13\n", "Episode length: 1094.50 +/- 243.25\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.09e+03 |\n", "| mean_reward | 153 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4550000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.278 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4568 |\n", "| fps | 102 |\n", "| time_elapsed | 489 |\n", "| total_timesteps | 4550224 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4572 |\n", "| fps | 102 |\n", "| time_elapsed | 533 |\n", "| total_timesteps | 4554956 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.109 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4576 |\n", "| fps | 103 |\n", "| time_elapsed | 574 |\n", "| total_timesteps | 4559341 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0731 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.09e+03 |\n", "| ep_rew_mean | 227 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4580 |\n", "| fps | 103 |\n", "| time_elapsed | 617 |\n", "| total_timesteps | 4563981 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.127 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.11e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4584 |\n", "| fps | 104 |\n", "| time_elapsed | 660 |\n", "| total_timesteps | 4568679 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0933 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 234 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4588 |\n", "| fps | 104 |\n", "| time_elapsed | 703 |\n", "| total_timesteps | 4573445 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.123 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4592 |\n", "| fps | 104 |\n", "| time_elapsed | 748 |\n", "| total_timesteps | 4578343 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0617 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4596 |\n", "| fps | 104 |\n", "| time_elapsed | 784 |\n", "| total_timesteps | 4582239 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0579 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4600 |\n", "| fps | 105 |\n", "| time_elapsed | 831 |\n", "| total_timesteps | 4587341 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0553 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4604 |\n", "| fps | 105 |\n", "| time_elapsed | 871 |\n", "| total_timesteps | 4591741 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.057 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4608 |\n", "| fps | 105 |\n", "| time_elapsed | 916 |\n", "| total_timesteps | 4596563 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.125 |\n", "----------------------------------\n", "Eval num_timesteps=4600000, episode_reward=258.70 +/- 59.54\n", "Episode length: 1226.20 +/- 145.82\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.23e+03 |\n", "| mean_reward | 259 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4600000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0855 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 220 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4612 |\n", "| fps | 102 |\n", "| time_elapsed | 993 |\n", "| total_timesteps | 4601605 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.226 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 222 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4616 |\n", "| fps | 102 |\n", "| time_elapsed | 1033 |\n", "| total_timesteps | 4605981 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.267 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4620 |\n", "| fps | 102 |\n", "| time_elapsed | 1075 |\n", "| total_timesteps | 4610579 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.107 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 224 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4624 |\n", "| fps | 102 |\n", "| time_elapsed | 1110 |\n", "| total_timesteps | 4614317 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.21 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 218 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4628 |\n", "| fps | 103 |\n", "| time_elapsed | 1143 |\n", "| total_timesteps | 4617973 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.125 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 210 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4632 |\n", "| fps | 103 |\n", "| time_elapsed | 1184 |\n", "| total_timesteps | 4622447 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 211 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4636 |\n", "| fps | 103 |\n", "| time_elapsed | 1236 |\n", "| total_timesteps | 4628093 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0845 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 209 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4640 |\n", "| fps | 103 |\n", "| time_elapsed | 1282 |\n", "| total_timesteps | 4633173 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.077 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 209 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4644 |\n", "| fps | 103 |\n", "| time_elapsed | 1325 |\n", "| total_timesteps | 4637791 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.185 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 205 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4648 |\n", "| fps | 104 |\n", "| time_elapsed | 1363 |\n", "| total_timesteps | 4641938 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0503 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 208 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4652 |\n", "| fps | 104 |\n", "| time_elapsed | 1406 |\n", "| total_timesteps | 4646690 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.206 |\n", "----------------------------------\n", "Eval num_timesteps=4650000, episode_reward=147.90 +/- 24.58\n", "Episode length: 1182.20 +/- 332.10\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 148 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4650000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.202 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 211 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4656 |\n", "| fps | 102 |\n", "| time_elapsed | 1484 |\n", "| total_timesteps | 4651994 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.088 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 206 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4660 |\n", "| fps | 102 |\n", "| time_elapsed | 1524 |\n", "| total_timesteps | 4656295 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0669 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 205 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4664 |\n", "| fps | 102 |\n", "| time_elapsed | 1569 |\n", "| total_timesteps | 4661231 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0614 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 207 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4668 |\n", "| fps | 102 |\n", "| time_elapsed | 1608 |\n", "| total_timesteps | 4665439 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0794 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 206 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4672 |\n", "| fps | 103 |\n", "| time_elapsed | 1654 |\n", "| total_timesteps | 4670458 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.109 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 205 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4676 |\n", "| fps | 103 |\n", "| time_elapsed | 1695 |\n", "| total_timesteps | 4674908 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.475 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 208 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4680 |\n", "| fps | 103 |\n", "| time_elapsed | 1732 |\n", "| total_timesteps | 4678992 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0491 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 209 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4684 |\n", "| fps | 103 |\n", "| time_elapsed | 1774 |\n", "| total_timesteps | 4683456 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.383 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 207 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4688 |\n", "| fps | 103 |\n", "| time_elapsed | 1807 |\n", "| total_timesteps | 4687116 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0789 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 206 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4692 |\n", "| fps | 103 |\n", "| time_elapsed | 1848 |\n", "| total_timesteps | 4691496 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.112 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 211 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4696 |\n", "| fps | 103 |\n", "| time_elapsed | 1888 |\n", "| total_timesteps | 4695910 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0691 |\n", "----------------------------------\n", "Eval num_timesteps=4700000, episode_reward=184.60 +/- 74.56\n", "Episode length: 1105.60 +/- 41.83\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.11e+03 |\n", "| mean_reward | 185 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4700000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0613 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 215 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4700 |\n", "| fps | 102 |\n", "| time_elapsed | 1961 |\n", "| total_timesteps | 4700876 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.117 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 215 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4704 |\n", "| fps | 102 |\n", "| time_elapsed | 1995 |\n", "| total_timesteps | 4704610 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0682 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4708 |\n", "| fps | 102 |\n", "| time_elapsed | 2041 |\n", "| total_timesteps | 4709502 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0989 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 224 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4712 |\n", "| fps | 102 |\n", "| time_elapsed | 2081 |\n", "| total_timesteps | 4713904 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.48 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4716 |\n", "| fps | 102 |\n", "| time_elapsed | 2122 |\n", "| total_timesteps | 4718268 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.127 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4720 |\n", "| fps | 102 |\n", "| time_elapsed | 2168 |\n", "| total_timesteps | 4723324 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.133 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4724 |\n", "| fps | 103 |\n", "| time_elapsed | 2214 |\n", "| total_timesteps | 4728258 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.703 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4728 |\n", "| fps | 103 |\n", "| time_elapsed | 2255 |\n", "| total_timesteps | 4732669 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0898 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 239 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4732 |\n", "| fps | 103 |\n", "| time_elapsed | 2299 |\n", "| total_timesteps | 4737404 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.224 |\n", "----------------------------------\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.10/site-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n", " logger.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4736 |\n", "| fps | 103 |\n", "| time_elapsed | 2337 |\n", "| total_timesteps | 4741388 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.108 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 228 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4740 |\n", "| fps | 103 |\n", "| time_elapsed | 2383 |\n", "| total_timesteps | 4745956 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "Eval num_timesteps=4750000, episode_reward=280.90 +/- 126.71\n", "Episode length: 1161.20 +/- 134.56\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.16e+03 |\n", "| mean_reward | 281 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4750000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0522 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4744 |\n", "| fps | 102 |\n", "| time_elapsed | 2451 |\n", "| total_timesteps | 4750482 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.443 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 226 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4748 |\n", "| fps | 102 |\n", "| time_elapsed | 2492 |\n", "| total_timesteps | 4755340 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.723 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 222 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4752 |\n", "| fps | 102 |\n", "| time_elapsed | 2527 |\n", "| total_timesteps | 4759598 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.236 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 218 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4756 |\n", "| fps | 102 |\n", "| time_elapsed | 2566 |\n", "| total_timesteps | 4764294 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.181 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 222 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4760 |\n", "| fps | 103 |\n", "| time_elapsed | 2605 |\n", "| total_timesteps | 4769042 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0983 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4764 |\n", "| fps | 103 |\n", "| time_elapsed | 2649 |\n", "| total_timesteps | 4774258 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.441 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 217 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4768 |\n", "| fps | 103 |\n", "| time_elapsed | 2686 |\n", "| total_timesteps | 4778812 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.748 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 218 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4772 |\n", "| fps | 103 |\n", "| time_elapsed | 2722 |\n", "| total_timesteps | 4783122 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 221 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4776 |\n", "| fps | 104 |\n", "| time_elapsed | 2763 |\n", "| total_timesteps | 4788078 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.129 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4780 |\n", "| fps | 104 |\n", "| time_elapsed | 2803 |\n", "| total_timesteps | 4792836 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.169 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4784 |\n", "| fps | 104 |\n", "| time_elapsed | 2837 |\n", "| total_timesteps | 4796970 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "Eval num_timesteps=4800000, episode_reward=165.10 +/- 32.75\n", "Episode length: 1192.80 +/- 112.32\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.19e+03 |\n", "| mean_reward | 165 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4800000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.515 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4788 |\n", "| fps | 103 |\n", "| time_elapsed | 2896 |\n", "| total_timesteps | 4800638 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4792 |\n", "| fps | 103 |\n", "| time_elapsed | 2935 |\n", "| total_timesteps | 4805230 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.183 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4796 |\n", "| fps | 104 |\n", "| time_elapsed | 2971 |\n", "| total_timesteps | 4809624 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.42 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4800 |\n", "| fps | 104 |\n", "| time_elapsed | 3014 |\n", "| total_timesteps | 4814848 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.314 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4804 |\n", "| fps | 104 |\n", "| time_elapsed | 3053 |\n", "| total_timesteps | 4819562 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 227 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4808 |\n", "| fps | 104 |\n", "| time_elapsed | 3084 |\n", "| total_timesteps | 4823296 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0623 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4812 |\n", "| fps | 105 |\n", "| time_elapsed | 3123 |\n", "| total_timesteps | 4827934 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.3 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4816 |\n", "| fps | 105 |\n", "| time_elapsed | 3166 |\n", "| total_timesteps | 4833192 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0986 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 231 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4820 |\n", "| fps | 105 |\n", "| time_elapsed | 3204 |\n", "| total_timesteps | 4837714 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0726 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 228 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4824 |\n", "| fps | 105 |\n", "| time_elapsed | 3242 |\n", "| total_timesteps | 4842298 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.172 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 234 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4828 |\n", "| fps | 105 |\n", "| time_elapsed | 3280 |\n", "| total_timesteps | 4846907 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.461 |\n", "----------------------------------\n", "Eval num_timesteps=4850000, episode_reward=349.00 +/- 126.98\n", "Episode length: 1160.80 +/- 119.67\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.16e+03 |\n", "| mean_reward | 349 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4850000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0891 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 241 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4832 |\n", "| fps | 104 |\n", "| time_elapsed | 3352 |\n", "| total_timesteps | 4851789 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.759 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 254 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4836 |\n", "| fps | 105 |\n", "| time_elapsed | 3395 |\n", "| total_timesteps | 4856955 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.256 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 258 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4840 |\n", "| fps | 105 |\n", "| time_elapsed | 3437 |\n", "| total_timesteps | 4862070 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.656 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4844 |\n", "| fps | 105 |\n", "| time_elapsed | 3480 |\n", "| total_timesteps | 4867316 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.22 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4848 |\n", "| fps | 105 |\n", "| time_elapsed | 3516 |\n", "| total_timesteps | 4871649 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.151 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4852 |\n", "| fps | 105 |\n", "| time_elapsed | 3557 |\n", "| total_timesteps | 4876577 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0918 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4856 |\n", "| fps | 106 |\n", "| time_elapsed | 3592 |\n", "| total_timesteps | 4880847 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0997 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 273 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4860 |\n", "| fps | 106 |\n", "| time_elapsed | 3627 |\n", "| total_timesteps | 4885110 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0592 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4864 |\n", "| fps | 106 |\n", "| time_elapsed | 3667 |\n", "| total_timesteps | 4889884 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.106 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 278 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4868 |\n", "| fps | 106 |\n", "| time_elapsed | 3693 |\n", "| total_timesteps | 4893048 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.872 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 281 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4872 |\n", "| fps | 106 |\n", "| time_elapsed | 3734 |\n", "| total_timesteps | 4897938 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.28 |\n", "----------------------------------\n", "Eval num_timesteps=4900000, episode_reward=410.60 +/- 149.05\n", "Episode length: 3731.90 +/- 7755.65\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 3.73e+03 |\n", "| mean_reward | 411 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4900000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.398 |\n", "----------------------------------\n", "New best mean reward!\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 276 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4876 |\n", "| fps | 104 |\n", "| time_elapsed | 3858 |\n", "| total_timesteps | 4902026 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4880 |\n", "| fps | 104 |\n", "| time_elapsed | 3891 |\n", "| total_timesteps | 4906048 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.613 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 274 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4884 |\n", "| fps | 104 |\n", "| time_elapsed | 3930 |\n", "| total_timesteps | 4910750 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0821 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 282 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4888 |\n", "| fps | 104 |\n", "| time_elapsed | 3965 |\n", "| total_timesteps | 4915014 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.147 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4892 |\n", "| fps | 104 |\n", "| time_elapsed | 4004 |\n", "| total_timesteps | 4919698 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.608 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4896 |\n", "| fps | 104 |\n", "| time_elapsed | 4042 |\n", "| total_timesteps | 4924326 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.196 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4900 |\n", "| fps | 105 |\n", "| time_elapsed | 4078 |\n", "| total_timesteps | 4928636 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.198 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 297 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4904 |\n", "| fps | 105 |\n", "| time_elapsed | 4116 |\n", "| total_timesteps | 4933262 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 301 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4908 |\n", "| fps | 105 |\n", "| time_elapsed | 4156 |\n", "| total_timesteps | 4938088 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.65 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 296 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4912 |\n", "| fps | 105 |\n", "| time_elapsed | 4196 |\n", "| total_timesteps | 4942904 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0664 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4916 |\n", "| fps | 105 |\n", "| time_elapsed | 4231 |\n", "| total_timesteps | 4947092 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.107 |\n", "----------------------------------\n", "Eval num_timesteps=4950000, episode_reward=142.50 +/- 63.77\n", "Episode length: 1094.80 +/- 177.66\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.09e+03 |\n", "| mean_reward | 142 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 4950000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.109 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4920 |\n", "| fps | 105 |\n", "| time_elapsed | 4294 |\n", "| total_timesteps | 4951572 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0572 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4924 |\n", "| fps | 105 |\n", "| time_elapsed | 4332 |\n", "| total_timesteps | 4956154 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.235 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4928 |\n", "| fps | 105 |\n", "| time_elapsed | 4375 |\n", "| total_timesteps | 4961306 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0616 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4932 |\n", "| fps | 105 |\n", "| time_elapsed | 4415 |\n", "| total_timesteps | 4966074 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.251 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4936 |\n", "| fps | 105 |\n", "| time_elapsed | 4448 |\n", "| total_timesteps | 4970042 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4940 |\n", "| fps | 105 |\n", "| time_elapsed | 4491 |\n", "| total_timesteps | 4975240 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.159 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4944 |\n", "| fps | 105 |\n", "| time_elapsed | 4537 |\n", "| total_timesteps | 4980424 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4948 |\n", "| fps | 105 |\n", "| time_elapsed | 4575 |\n", "| total_timesteps | 4984212 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.169 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4952 |\n", "| fps | 105 |\n", "| time_elapsed | 4614 |\n", "| total_timesteps | 4988938 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.114 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4956 |\n", "| fps | 106 |\n", "| time_elapsed | 4656 |\n", "| total_timesteps | 4994060 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0682 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 275 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4960 |\n", "| fps | 106 |\n", "| time_elapsed | 4694 |\n", "| total_timesteps | 4998590 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.145 |\n", "----------------------------------\n", "Eval num_timesteps=5000000, episode_reward=208.10 +/- 101.73\n", "Episode length: 1131.30 +/- 211.53\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.13e+03 |\n", "| mean_reward | 208 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5000000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0833 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 266 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4964 |\n", "| fps | 105 |\n", "| time_elapsed | 4756 |\n", "| total_timesteps | 5002796 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.112 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 271 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4968 |\n", "| fps | 105 |\n", "| time_elapsed | 4787 |\n", "| total_timesteps | 5006474 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0682 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4972 |\n", "| fps | 105 |\n", "| time_elapsed | 4826 |\n", "| total_timesteps | 5011206 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.141 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 268 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4976 |\n", "| fps | 106 |\n", "| time_elapsed | 4862 |\n", "| total_timesteps | 5015608 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.3 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 265 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4980 |\n", "| fps | 106 |\n", "| time_elapsed | 4901 |\n", "| total_timesteps | 5020362 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.119 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4984 |\n", "| fps | 106 |\n", "| time_elapsed | 4941 |\n", "| total_timesteps | 5025142 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.352 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 254 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4988 |\n", "| fps | 106 |\n", "| time_elapsed | 4977 |\n", "| total_timesteps | 5029468 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0941 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 245 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4992 |\n", "| fps | 106 |\n", "| time_elapsed | 5008 |\n", "| total_timesteps | 5033292 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 240 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 4996 |\n", "| fps | 106 |\n", "| time_elapsed | 5048 |\n", "| total_timesteps | 5038052 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.086 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 237 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5000 |\n", "| fps | 106 |\n", "| time_elapsed | 5082 |\n", "| total_timesteps | 5042210 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 232 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5004 |\n", "| fps | 106 |\n", "| time_elapsed | 5120 |\n", "| total_timesteps | 5046804 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.172 |\n", "----------------------------------\n", "Eval num_timesteps=5050000, episode_reward=226.30 +/- 99.10\n", "Episode length: 1158.80 +/- 184.13\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.16e+03 |\n", "| mean_reward | 226 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5050000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.194 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5008 |\n", "| fps | 106 |\n", "| time_elapsed | 5189 |\n", "| total_timesteps | 5051776 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.166 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5012 |\n", "| fps | 106 |\n", "| time_elapsed | 5219 |\n", "| total_timesteps | 5055328 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0759 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.12e+03 |\n", "| ep_rew_mean | 228 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5016 |\n", "| fps | 106 |\n", "| time_elapsed | 5252 |\n", "| total_timesteps | 5059400 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.13 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5020 |\n", "| fps | 106 |\n", "| time_elapsed | 5299 |\n", "| total_timesteps | 5065010 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.15 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5024 |\n", "| fps | 106 |\n", "| time_elapsed | 5339 |\n", "| total_timesteps | 5069830 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0841 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 224 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5028 |\n", "| fps | 106 |\n", "| time_elapsed | 5377 |\n", "| total_timesteps | 5074500 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.385 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 227 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5032 |\n", "| fps | 106 |\n", "| time_elapsed | 5415 |\n", "| total_timesteps | 5079066 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0862 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 230 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5036 |\n", "| fps | 107 |\n", "| time_elapsed | 5456 |\n", "| total_timesteps | 5084026 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5040 |\n", "| fps | 107 |\n", "| time_elapsed | 5491 |\n", "| total_timesteps | 5088338 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.104 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5044 |\n", "| fps | 107 |\n", "| time_elapsed | 5535 |\n", "| total_timesteps | 5093568 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0806 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 235 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5048 |\n", "| fps | 107 |\n", "| time_elapsed | 5573 |\n", "| total_timesteps | 5098234 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.34 |\n", "----------------------------------\n", "Eval num_timesteps=5100000, episode_reward=336.30 +/- 88.28\n", "Episode length: 1227.20 +/- 103.72\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.23e+03 |\n", "| mean_reward | 336 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5100000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.126 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 229 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5052 |\n", "| fps | 106 |\n", "| time_elapsed | 5641 |\n", "| total_timesteps | 5102858 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0794 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 225 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5056 |\n", "| fps | 106 |\n", "| time_elapsed | 5678 |\n", "| total_timesteps | 5107364 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0915 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.13e+03 |\n", "| ep_rew_mean | 223 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5060 |\n", "| fps | 107 |\n", "| time_elapsed | 5714 |\n", "| total_timesteps | 5111730 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0796 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.14e+03 |\n", "| ep_rew_mean | 236 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5064 |\n", "| fps | 107 |\n", "| time_elapsed | 5754 |\n", "| total_timesteps | 5116482 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.197 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 237 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5068 |\n", "| fps | 107 |\n", "| time_elapsed | 5798 |\n", "| total_timesteps | 5121802 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0614 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 238 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5072 |\n", "| fps | 107 |\n", "| time_elapsed | 5837 |\n", "| total_timesteps | 5126522 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.1 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 233 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5076 |\n", "| fps | 107 |\n", "| time_elapsed | 5870 |\n", "| total_timesteps | 5130630 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.099 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 238 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5080 |\n", "| fps | 107 |\n", "| time_elapsed | 5915 |\n", "| total_timesteps | 5135982 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.101 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 238 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5084 |\n", "| fps | 107 |\n", "| time_elapsed | 5952 |\n", "| total_timesteps | 5140454 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.187 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.15e+03 |\n", "| ep_rew_mean | 242 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5088 |\n", "| fps | 107 |\n", "| time_elapsed | 5989 |\n", "| total_timesteps | 5144900 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.111 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 243 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5092 |\n", "| fps | 107 |\n", "| time_elapsed | 6026 |\n", "| total_timesteps | 5149462 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.414 |\n", "----------------------------------\n", "Eval num_timesteps=5150000, episode_reward=269.20 +/- 98.76\n", "Episode length: 1092.20 +/- 97.39\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.09e+03 |\n", "| mean_reward | 269 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5150000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.752 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 244 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5096 |\n", "| fps | 107 |\n", "| time_elapsed | 6089 |\n", "| total_timesteps | 5153838 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.729 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5100 |\n", "| fps | 107 |\n", "| time_elapsed | 6133 |\n", "| total_timesteps | 5159182 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0717 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 248 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5104 |\n", "| fps | 107 |\n", "| time_elapsed | 6170 |\n", "| total_timesteps | 5163654 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.109 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5108 |\n", "| fps | 107 |\n", "| time_elapsed | 6208 |\n", "| total_timesteps | 5168260 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0965 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 247 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5112 |\n", "| fps | 107 |\n", "| time_elapsed | 6242 |\n", "| total_timesteps | 5172356 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.167 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5116 |\n", "| fps | 107 |\n", "| time_elapsed | 6289 |\n", "| total_timesteps | 5178032 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0593 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5120 |\n", "| fps | 107 |\n", "| time_elapsed | 6332 |\n", "| total_timesteps | 5183222 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0632 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 257 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5124 |\n", "| fps | 107 |\n", "| time_elapsed | 6367 |\n", "| total_timesteps | 5187410 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.11 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 254 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5128 |\n", "| fps | 108 |\n", "| time_elapsed | 6406 |\n", "| total_timesteps | 5192196 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0463 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 253 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5132 |\n", "| fps | 108 |\n", "| time_elapsed | 6445 |\n", "| total_timesteps | 5196972 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.637 |\n", "----------------------------------\n", "Eval num_timesteps=5200000, episode_reward=261.00 +/- 107.33\n", "Episode length: 1171.20 +/- 110.57\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 261 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5200000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.12 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 258 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5136 |\n", "| fps | 107 |\n", "| time_elapsed | 6507 |\n", "| total_timesteps | 5201019 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.135 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5140 |\n", "| fps | 107 |\n", "| time_elapsed | 6544 |\n", "| total_timesteps | 5205477 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.223 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 252 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5144 |\n", "| fps | 107 |\n", "| time_elapsed | 6582 |\n", "| total_timesteps | 5210083 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0886 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 256 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5148 |\n", "| fps | 107 |\n", "| time_elapsed | 6622 |\n", "| total_timesteps | 5214991 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.246 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 258 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5152 |\n", "| fps | 108 |\n", "| time_elapsed | 6659 |\n", "| total_timesteps | 5219453 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.196 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.16e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5156 |\n", "| fps | 108 |\n", "| time_elapsed | 6698 |\n", "| total_timesteps | 5223830 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.061 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 258 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5160 |\n", "| fps | 108 |\n", "| time_elapsed | 6748 |\n", "| total_timesteps | 5229232 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0769 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 249 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5164 |\n", "| fps | 108 |\n", "| time_elapsed | 6788 |\n", "| total_timesteps | 5234078 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0448 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 250 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5168 |\n", "| fps | 108 |\n", "| time_elapsed | 6830 |\n", "| total_timesteps | 5239142 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.15 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.17e+03 |\n", "| ep_rew_mean | 250 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5172 |\n", "| fps | 108 |\n", "| time_elapsed | 6870 |\n", "| total_timesteps | 5243938 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.129 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 261 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5176 |\n", "| fps | 108 |\n", "| time_elapsed | 6908 |\n", "| total_timesteps | 5248634 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.102 |\n", "----------------------------------\n", "Eval num_timesteps=5250000, episode_reward=278.80 +/- 82.30\n", "Episode length: 1327.20 +/- 175.34\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.33e+03 |\n", "| mean_reward | 279 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5250000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0251 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.18e+03 |\n", "| ep_rew_mean | 259 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5180 |\n", "| fps | 107 |\n", "| time_elapsed | 6983 |\n", "| total_timesteps | 5253766 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0498 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.19e+03 |\n", "| ep_rew_mean | 263 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5184 |\n", "| fps | 108 |\n", "| time_elapsed | 7031 |\n", "| total_timesteps | 5259588 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0923 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 264 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5188 |\n", "| fps | 108 |\n", "| time_elapsed | 7072 |\n", "| total_timesteps | 5264540 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.082 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5192 |\n", "| fps | 108 |\n", "| time_elapsed | 7114 |\n", "| total_timesteps | 5269542 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0698 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5196 |\n", "| fps | 108 |\n", "| time_elapsed | 7160 |\n", "| total_timesteps | 5275196 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0705 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.2e+03 |\n", "| ep_rew_mean | 270 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5200 |\n", "| fps | 108 |\n", "| time_elapsed | 7194 |\n", "| total_timesteps | 5279246 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.603 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.21e+03 |\n", "| ep_rew_mean | 272 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5204 |\n", "| fps | 108 |\n", "| time_elapsed | 7237 |\n", "| total_timesteps | 5284449 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0686 |\n", "----------------------------------\n", "Eval num_timesteps=5300000, episode_reward=343.20 +/- 76.80\n", "Episode length: 1169.20 +/- 207.18\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.17e+03 |\n", "| mean_reward | 343 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5300000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.053 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 269 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5208 |\n", "| fps | 108 |\n", "| time_elapsed | 7513 |\n", "| total_timesteps | 5314513 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.382 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5212 |\n", "| fps | 108 |\n", "| time_elapsed | 7555 |\n", "| total_timesteps | 5319599 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0293 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5216 |\n", "| fps | 108 |\n", "| time_elapsed | 7596 |\n", "| total_timesteps | 5324493 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0972 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 279 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5220 |\n", "| fps | 108 |\n", "| time_elapsed | 7635 |\n", "| total_timesteps | 5329199 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.112 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5224 |\n", "| fps | 108 |\n", "| time_elapsed | 7672 |\n", "| total_timesteps | 5333761 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0837 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5228 |\n", "| fps | 108 |\n", "| time_elapsed | 7705 |\n", "| total_timesteps | 5337721 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0536 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.45e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5232 |\n", "| fps | 108 |\n", "| time_elapsed | 7741 |\n", "| total_timesteps | 5342003 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0296 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 286 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5236 |\n", "| fps | 108 |\n", "| time_elapsed | 7782 |\n", "| total_timesteps | 5346911 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0846 |\n", "----------------------------------\n", "Eval num_timesteps=5350000, episode_reward=398.50 +/- 28.85\n", "Episode length: 1310.00 +/- 130.03\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.31e+03 |\n", "| mean_reward | 398 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5350000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.53 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5240 |\n", "| fps | 108 |\n", "| time_elapsed | 7852 |\n", "| total_timesteps | 5351477 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.553 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5244 |\n", "| fps | 108 |\n", "| time_elapsed | 7900 |\n", "| total_timesteps | 5357213 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0614 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5248 |\n", "| fps | 108 |\n", "| time_elapsed | 7941 |\n", "| total_timesteps | 5362253 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0339 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5252 |\n", "| fps | 108 |\n", "| time_elapsed | 7975 |\n", "| total_timesteps | 5366379 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0591 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5256 |\n", "| fps | 109 |\n", "| time_elapsed | 8221 |\n", "| total_timesteps | 5396409 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0583 |\n", "----------------------------------\n", "Eval num_timesteps=5400000, episode_reward=370.60 +/- 27.41\n", "Episode length: 1182.10 +/- 120.80\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 371 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5400000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.76e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5260 |\n", "| fps | 108 |\n", "| time_elapsed | 8325 |\n", "| total_timesteps | 5405585 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0588 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.77e+03 |\n", "| ep_rew_mean | 294 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5264 |\n", "| fps | 108 |\n", "| time_elapsed | 8369 |\n", "| total_timesteps | 5410933 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0561 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.76e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5268 |\n", "| fps | 108 |\n", "| time_elapsed | 8404 |\n", "| total_timesteps | 5415135 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0492 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.76e+03 |\n", "| ep_rew_mean | 298 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5272 |\n", "| fps | 108 |\n", "| time_elapsed | 8447 |\n", "| total_timesteps | 5420341 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0396 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.76e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5276 |\n", "| fps | 109 |\n", "| time_elapsed | 8479 |\n", "| total_timesteps | 5424295 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0449 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.75e+03 |\n", "| ep_rew_mean | 295 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5280 |\n", "| fps | 109 |\n", "| time_elapsed | 8518 |\n", "| total_timesteps | 5428987 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0455 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.74e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5284 |\n", "| fps | 109 |\n", "| time_elapsed | 8556 |\n", "| total_timesteps | 5433647 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0596 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.74e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5288 |\n", "| fps | 109 |\n", "| time_elapsed | 8598 |\n", "| total_timesteps | 5438759 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.121 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.74e+03 |\n", "| ep_rew_mean | 293 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5292 |\n", "| fps | 109 |\n", "| time_elapsed | 8640 |\n", "| total_timesteps | 5443873 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0557 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 290 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5296 |\n", "| fps | 109 |\n", "| time_elapsed | 8676 |\n", "| total_timesteps | 5448186 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.152 |\n", "----------------------------------\n", "Eval num_timesteps=5450000, episode_reward=222.50 +/- 126.86\n", "Episode length: 1182.20 +/- 298.20\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.18e+03 |\n", "| mean_reward | 222 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5450000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0782 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.74e+03 |\n", "| ep_rew_mean | 291 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5300 |\n", "| fps | 108 |\n", "| time_elapsed | 8745 |\n", "| total_timesteps | 5453102 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.105 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.73e+03 |\n", "| ep_rew_mean | 289 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5304 |\n", "| fps | 109 |\n", "| time_elapsed | 8782 |\n", "| total_timesteps | 5457599 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0799 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 292 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5308 |\n", "| fps | 109 |\n", "| time_elapsed | 8821 |\n", "| total_timesteps | 5461919 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.453 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 287 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5312 |\n", "| fps | 109 |\n", "| time_elapsed | 8860 |\n", "| total_timesteps | 5465915 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.201 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 283 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5316 |\n", "| fps | 109 |\n", "| time_elapsed | 8901 |\n", "| total_timesteps | 5470837 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0584 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.46e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5320 |\n", "| fps | 109 |\n", "| time_elapsed | 8939 |\n", "| total_timesteps | 5475475 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.124 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5324 |\n", "| fps | 109 |\n", "| time_elapsed | 8979 |\n", "| total_timesteps | 5480323 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.136 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 284 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5328 |\n", "| fps | 109 |\n", "| time_elapsed | 9016 |\n", "| total_timesteps | 5484847 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.136 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.48e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5332 |\n", "| fps | 109 |\n", "| time_elapsed | 9056 |\n", "| total_timesteps | 5489673 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 2.58 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5336 |\n", "| fps | 109 |\n", "| time_elapsed | 9091 |\n", "| total_timesteps | 5493893 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 1.28 |\n", "----------------------------------\n", "----------------------------------\n", "| rollout/ | |\n", "| ep_len_mean | 1.47e+03 |\n", "| ep_rew_mean | 285 |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| episodes | 5340 |\n", "| fps | 109 |\n", "| time_elapsed | 9130 |\n", "| total_timesteps | 5498711 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.0636 |\n", "----------------------------------\n", "Eval num_timesteps=5500000, episode_reward=418.30 +/- 110.90\n", "Episode length: 1018.20 +/- 150.28\n", "----------------------------------\n", "| eval/ | |\n", "| mean_ep_length | 1.02e+03 |\n", "| mean_reward | 418 |\n", "| rollout/ | |\n", "| exploration_rate | 0.05 |\n", "| time/ | |\n", "| total_timesteps | 5500000 |\n", "| train/ | |\n", "| learning_rate | 5e-05 |\n", "| loss | 0.079 |\n", "----------------------------------\n", "New best mean reward!\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train the model\n", "model.learn(total_timesteps=NUM_TIMESTEPS, \n", " callback=callback_list, \n", " tb_log_name=\"./tb/\", \n", " reset_num_timesteps=False)" ] }, { "cell_type": "code", "execution_count": 9, "id": "c2906e40", "metadata": { "execution": { "iopub.execute_input": "2024-05-14T16:24:04.153120Z", "iopub.status.busy": "2024-05-14T16:24:04.152474Z", "iopub.status.idle": "2024-05-14T16:25:00.065857Z", "shell.execute_reply": "2024-05-14T16:25:00.064952Z" }, "papermill": { "duration": 55.941132, "end_time": "2024-05-14T16:25:00.068233", "exception": false, "start_time": "2024-05-14T16:24:04.127101", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Save the model, policy, and replay buffer for future loading and training\n", "model.save(MODEL_FILE_NAME)\n", "model.save_replay_buffer(BUFFER_FILE_NAME)\n", "model.policy.save(POLICY_FILE_NAME)" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 176909235, "sourceType": "kernelVersion" } ], "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" }, "papermill": { "default_parameters": {}, "duration": 9447.284519, "end_time": "2024-05-14T16:25:04.795351", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-05-14T13:47:37.510832", "version": "2.5.0" } }, "nbformat": 4, "nbformat_minor": 5 }