diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1,50 +1,50 @@ -[2024-09-22 11:56:50,676][00564] Saving configuration to /content/train_dir/default_experiment/config.json... -[2024-09-22 11:56:50,679][00564] Rollout worker 0 uses device cpu -[2024-09-22 11:56:50,681][00564] Rollout worker 1 uses device cpu -[2024-09-22 11:56:50,682][00564] Rollout worker 2 uses device cpu -[2024-09-22 11:56:50,684][00564] Rollout worker 3 uses device cpu -[2024-09-22 11:56:50,686][00564] Rollout worker 4 uses device cpu -[2024-09-22 11:56:50,691][00564] Rollout worker 5 uses device cpu -[2024-09-22 11:56:50,692][00564] Rollout worker 6 uses device cpu -[2024-09-22 11:56:50,693][00564] Rollout worker 7 uses device cpu -[2024-09-22 11:56:50,858][00564] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-09-22 11:56:50,860][00564] InferenceWorker_p0-w0: min num requests: 2 -[2024-09-22 11:56:50,894][00564] Starting all processes... -[2024-09-22 11:56:50,895][00564] Starting process learner_proc0 -[2024-09-22 11:56:51,597][00564] Starting all processes... -[2024-09-22 11:56:51,606][00564] Starting process inference_proc0-0 -[2024-09-22 11:56:51,606][00564] Starting process rollout_proc0 -[2024-09-22 11:56:51,606][00564] Starting process rollout_proc1 -[2024-09-22 11:56:51,606][00564] Starting process rollout_proc2 -[2024-09-22 11:56:51,607][00564] Starting process rollout_proc3 -[2024-09-22 11:56:51,607][00564] Starting process rollout_proc4 -[2024-09-22 11:56:51,607][00564] Starting process rollout_proc5 -[2024-09-22 11:56:51,607][00564] Starting process rollout_proc6 -[2024-09-22 11:56:51,607][00564] Starting process rollout_proc7 -[2024-09-22 11:57:07,108][02942] Worker 3 uses CPU cores [1] -[2024-09-22 11:57:07,172][02938] Worker 0 uses CPU cores [0] -[2024-09-22 11:57:07,313][02925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-09-22 11:57:07,313][02941] Worker 2 uses CPU cores [0] -[2024-09-22 11:57:07,313][02925] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 -[2024-09-22 11:57:07,323][02944] Worker 4 uses CPU cores [0] -[2024-09-22 11:57:07,356][02925] Num visible devices: 1 -[2024-09-22 11:57:07,363][02945] Worker 7 uses CPU cores [1] -[2024-09-22 11:57:07,384][02925] Starting seed is not provided -[2024-09-22 11:57:07,384][02925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-09-22 11:57:07,385][02925] Initializing actor-critic model on device cuda:0 -[2024-09-22 11:57:07,386][02925] RunningMeanStd input shape: (3, 72, 128) -[2024-09-22 11:57:07,389][02925] RunningMeanStd input shape: (1,) -[2024-09-22 11:57:07,427][02925] ConvEncoder: input_channels=3 -[2024-09-22 11:57:07,436][02943] Worker 5 uses CPU cores [1] -[2024-09-22 11:57:07,454][02940] Worker 1 uses CPU cores [1] -[2024-09-22 11:57:07,472][02939] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-09-22 11:57:07,472][02939] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 -[2024-09-22 11:57:07,492][02939] Num visible devices: 1 -[2024-09-22 11:57:07,515][02946] Worker 6 uses CPU cores [0] -[2024-09-22 11:57:07,703][02925] Conv encoder output size: 512 -[2024-09-22 11:57:07,703][02925] Policy head output size: 512 -[2024-09-22 11:57:07,762][02925] Created Actor Critic model with architecture: -[2024-09-22 11:57:07,764][02925] ActorCriticSharedWeights( +[2024-09-22 15:26:35,389][00338] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-22 15:26:35,392][00338] Rollout worker 0 uses device cpu +[2024-09-22 15:26:35,393][00338] Rollout worker 1 uses device cpu +[2024-09-22 15:26:35,396][00338] Rollout worker 2 uses device cpu +[2024-09-22 15:26:35,400][00338] Rollout worker 3 uses device cpu +[2024-09-22 15:26:35,401][00338] Rollout worker 4 uses device cpu +[2024-09-22 15:26:35,402][00338] Rollout worker 5 uses device cpu +[2024-09-22 15:26:35,404][00338] Rollout worker 6 uses device cpu +[2024-09-22 15:26:35,408][00338] Rollout worker 7 uses device cpu +[2024-09-22 15:26:35,574][00338] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 15:26:35,577][00338] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-22 15:26:35,610][00338] Starting all processes... +[2024-09-22 15:26:35,611][00338] Starting process learner_proc0 +[2024-09-22 15:26:36,309][00338] Starting all processes... +[2024-09-22 15:26:36,320][00338] Starting process inference_proc0-0 +[2024-09-22 15:26:36,321][00338] Starting process rollout_proc0 +[2024-09-22 15:26:36,326][00338] Starting process rollout_proc1 +[2024-09-22 15:26:36,326][00338] Starting process rollout_proc2 +[2024-09-22 15:26:36,326][00338] Starting process rollout_proc3 +[2024-09-22 15:26:36,326][00338] Starting process rollout_proc4 +[2024-09-22 15:26:36,326][00338] Starting process rollout_proc5 +[2024-09-22 15:26:36,326][00338] Starting process rollout_proc6 +[2024-09-22 15:26:36,326][00338] Starting process rollout_proc7 +[2024-09-22 15:26:51,985][02371] Worker 4 uses CPU cores [0] +[2024-09-22 15:26:52,373][02352] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 15:26:52,373][02352] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-09-22 15:26:52,387][02367] Worker 1 uses CPU cores [1] +[2024-09-22 15:26:52,435][02352] Num visible devices: 1 +[2024-09-22 15:26:52,471][02352] Starting seed is not provided +[2024-09-22 15:26:52,471][02352] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 15:26:52,471][02352] Initializing actor-critic model on device cuda:0 +[2024-09-22 15:26:52,471][02352] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 15:26:52,474][02352] RunningMeanStd input shape: (1,) +[2024-09-22 15:26:52,554][02352] ConvEncoder: input_channels=3 +[2024-09-22 15:26:52,634][02368] Worker 2 uses CPU cores [0] +[2024-09-22 15:26:52,673][02372] Worker 6 uses CPU cores [0] +[2024-09-22 15:26:52,686][02369] Worker 3 uses CPU cores [1] +[2024-09-22 15:26:52,711][02366] Worker 0 uses CPU cores [0] +[2024-09-22 15:26:52,768][02370] Worker 5 uses CPU cores [1] +[2024-09-22 15:26:52,782][02373] Worker 7 uses CPU cores [1] +[2024-09-22 15:26:52,808][02365] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 15:26:52,809][02365] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-09-22 15:26:52,827][02365] Num visible devices: 1 +[2024-09-22 15:26:52,895][02352] Conv encoder output size: 512 +[2024-09-22 15:26:52,895][02352] Policy head output size: 512 +[2024-09-22 15:26:52,956][02352] Created Actor Critic model with architecture: +[2024-09-22 15:26:52,956][02352] ActorCriticSharedWeights( (obs_normalizer): ObservationNormalizer( (running_mean_std): RunningMeanStdDictInPlace( (running_mean_std): ModuleDict( @@ -85,1042 +85,1887 @@ (distribution_linear): Linear(in_features=512, out_features=5, bias=True) ) ) -[2024-09-22 11:57:08,243][02925] Using optimizer -[2024-09-22 11:57:09,080][02925] No checkpoints found -[2024-09-22 11:57:09,080][02925] Did not load from checkpoint, starting from scratch! -[2024-09-22 11:57:09,080][02925] Initialized policy 0 weights for model version 0 -[2024-09-22 11:57:09,084][02925] LearnerWorker_p0 finished initialization! -[2024-09-22 11:57:09,086][02925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) -[2024-09-22 11:57:09,177][02939] RunningMeanStd input shape: (3, 72, 128) -[2024-09-22 11:57:09,178][02939] RunningMeanStd input shape: (1,) -[2024-09-22 11:57:09,190][02939] ConvEncoder: input_channels=3 -[2024-09-22 11:57:09,290][02939] Conv encoder output size: 512 -[2024-09-22 11:57:09,291][02939] Policy head output size: 512 -[2024-09-22 11:57:09,342][00564] Inference worker 0-0 is ready! -[2024-09-22 11:57:09,344][00564] All inference workers are ready! Signal rollout workers to start! -[2024-09-22 11:57:09,691][02944] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:09,728][02941] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:09,739][02945] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:09,740][02946] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:09,749][02940] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:09,736][02938] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:09,762][02943] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:09,767][02942] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 11:57:10,257][00564] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-22 11:57:10,851][00564] Heartbeat connected on Batcher_0 -[2024-09-22 11:57:10,855][00564] Heartbeat connected on LearnerWorker_p0 -[2024-09-22 11:57:10,882][02946] Decorrelating experience for 0 frames... -[2024-09-22 11:57:10,881][02944] Decorrelating experience for 0 frames... -[2024-09-22 11:57:10,912][00564] Heartbeat connected on InferenceWorker_p0-w0 -[2024-09-22 11:57:11,110][02945] Decorrelating experience for 0 frames... -[2024-09-22 11:57:11,109][02942] Decorrelating experience for 0 frames... -[2024-09-22 11:57:11,117][02940] Decorrelating experience for 0 frames... -[2024-09-22 11:57:11,428][02944] Decorrelating experience for 32 frames... -[2024-09-22 11:57:11,939][02941] Decorrelating experience for 0 frames... -[2024-09-22 11:57:12,102][02945] Decorrelating experience for 32 frames... -[2024-09-22 11:57:12,104][02942] Decorrelating experience for 32 frames... -[2024-09-22 11:57:12,206][02944] Decorrelating experience for 64 frames... -[2024-09-22 11:57:12,718][02940] Decorrelating experience for 32 frames... -[2024-09-22 11:57:13,185][02941] Decorrelating experience for 32 frames... -[2024-09-22 11:57:13,189][02944] Decorrelating experience for 96 frames... -[2024-09-22 11:57:13,263][02943] Decorrelating experience for 0 frames... -[2024-09-22 11:57:13,382][00564] Heartbeat connected on RolloutWorker_w4 -[2024-09-22 11:57:13,831][02945] Decorrelating experience for 64 frames... -[2024-09-22 11:57:14,056][02946] Decorrelating experience for 32 frames... -[2024-09-22 11:57:14,767][02940] Decorrelating experience for 64 frames... -[2024-09-22 11:57:15,131][02941] Decorrelating experience for 64 frames... -[2024-09-22 11:57:15,260][00564] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-22 11:57:15,313][02943] Decorrelating experience for 32 frames... -[2024-09-22 11:57:15,378][02942] Decorrelating experience for 64 frames... -[2024-09-22 11:57:16,319][02946] Decorrelating experience for 64 frames... -[2024-09-22 11:57:17,528][02940] Decorrelating experience for 96 frames... -[2024-09-22 11:57:17,670][02945] Decorrelating experience for 96 frames... -[2024-09-22 11:57:17,847][00564] Heartbeat connected on RolloutWorker_w1 -[2024-09-22 11:57:17,973][00564] Heartbeat connected on RolloutWorker_w7 -[2024-09-22 11:57:18,021][02942] Decorrelating experience for 96 frames... -[2024-09-22 11:57:18,380][00564] Heartbeat connected on RolloutWorker_w3 -[2024-09-22 11:57:18,524][02943] Decorrelating experience for 64 frames... -[2024-09-22 11:57:19,050][02941] Decorrelating experience for 96 frames... -[2024-09-22 11:57:19,244][02946] Decorrelating experience for 96 frames... -[2024-09-22 11:57:19,409][00564] Heartbeat connected on RolloutWorker_w2 -[2024-09-22 11:57:19,803][00564] Heartbeat connected on RolloutWorker_w6 -[2024-09-22 11:57:20,259][00564] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 51.2. Samples: 512. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) -[2024-09-22 11:57:20,266][00564] Avg episode reward: [(0, '2.485')] -[2024-09-22 11:57:21,933][02925] Signal inference workers to stop experience collection... -[2024-09-22 11:57:21,926][02943] Decorrelating experience for 96 frames... -[2024-09-22 11:57:21,939][02939] InferenceWorker_p0-w0: stopping experience collection -[2024-09-22 11:57:22,012][00564] Heartbeat connected on RolloutWorker_w5 -[2024-09-22 11:57:25,247][02925] Signal inference workers to resume experience collection... -[2024-09-22 11:57:25,248][02939] InferenceWorker_p0-w0: resuming experience collection -[2024-09-22 11:57:25,257][00564] Fps is (10 sec: 409.7, 60 sec: 273.1, 300 sec: 273.1). Total num frames: 4096. Throughput: 0: 160.5. Samples: 2408. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) -[2024-09-22 11:57:25,263][00564] Avg episode reward: [(0, '3.104')] -[2024-09-22 11:57:30,257][00564] Fps is (10 sec: 2458.2, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 24576. Throughput: 0: 212.4. Samples: 4248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 11:57:30,259][00564] Avg episode reward: [(0, '3.946')] -[2024-09-22 11:57:34,935][02939] Updated weights for policy 0, policy_version 10 (0.0266) -[2024-09-22 11:57:35,258][00564] Fps is (10 sec: 3686.0, 60 sec: 1638.3, 300 sec: 1638.3). Total num frames: 40960. Throughput: 0: 388.8. Samples: 9720. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:57:35,260][00564] Avg episode reward: [(0, '4.045')] -[2024-09-22 11:57:40,257][00564] Fps is (10 sec: 3276.8, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 57344. Throughput: 0: 487.5. Samples: 14624. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:57:40,259][00564] Avg episode reward: [(0, '4.299')] -[2024-09-22 11:57:44,745][02939] Updated weights for policy 0, policy_version 20 (0.0034) -[2024-09-22 11:57:45,257][00564] Fps is (10 sec: 4096.3, 60 sec: 2340.5, 300 sec: 2340.5). Total num frames: 81920. Throughput: 0: 517.2. Samples: 18102. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:57:45,263][00564] Avg episode reward: [(0, '4.165')] -[2024-09-22 11:57:50,258][00564] Fps is (10 sec: 4505.2, 60 sec: 2559.9, 300 sec: 2559.9). Total num frames: 102400. Throughput: 0: 626.6. Samples: 25064. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:57:50,260][00564] Avg episode reward: [(0, '4.473')] -[2024-09-22 11:57:50,265][02925] Saving new best policy, reward=4.473! -[2024-09-22 11:57:55,257][00564] Fps is (10 sec: 3686.5, 60 sec: 2639.6, 300 sec: 2639.6). Total num frames: 118784. Throughput: 0: 650.4. Samples: 29266. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:57:55,259][00564] Avg episode reward: [(0, '4.616')] -[2024-09-22 11:57:55,269][02925] Saving new best policy, reward=4.616! -[2024-09-22 11:57:56,160][02939] Updated weights for policy 0, policy_version 30 (0.0039) -[2024-09-22 11:58:00,257][00564] Fps is (10 sec: 3686.7, 60 sec: 2785.3, 300 sec: 2785.3). Total num frames: 139264. Throughput: 0: 725.4. Samples: 32640. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 11:58:00,265][00564] Avg episode reward: [(0, '4.502')] -[2024-09-22 11:58:05,257][00564] Fps is (10 sec: 4096.0, 60 sec: 2904.4, 300 sec: 2904.4). Total num frames: 159744. Throughput: 0: 866.6. Samples: 39508. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:58:05,261][00564] Avg episode reward: [(0, '4.384')] -[2024-09-22 11:58:07,055][02939] Updated weights for policy 0, policy_version 40 (0.0037) -[2024-09-22 11:58:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 172032. Throughput: 0: 900.8. Samples: 42942. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:58:10,262][00564] Avg episode reward: [(0, '4.367')] -[2024-09-22 11:58:15,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3208.7, 300 sec: 2961.7). Total num frames: 192512. Throughput: 0: 921.3. Samples: 45708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:58:15,260][00564] Avg episode reward: [(0, '4.323')] -[2024-09-22 11:58:17,666][02939] Updated weights for policy 0, policy_version 50 (0.0025) -[2024-09-22 11:58:20,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3618.3, 300 sec: 3101.3). Total num frames: 217088. Throughput: 0: 953.3. Samples: 52616. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:58:20,260][00564] Avg episode reward: [(0, '4.536')] -[2024-09-22 11:58:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3058.3). Total num frames: 229376. Throughput: 0: 960.7. Samples: 57854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:58:25,264][00564] Avg episode reward: [(0, '4.479')] -[2024-09-22 11:58:28,912][02939] Updated weights for policy 0, policy_version 60 (0.0035) -[2024-09-22 11:58:30,262][00564] Fps is (10 sec: 3275.0, 60 sec: 3754.3, 300 sec: 3123.0). Total num frames: 249856. Throughput: 0: 931.0. Samples: 60002. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 11:58:30,265][00564] Avg episode reward: [(0, '4.339')] -[2024-09-22 11:58:35,258][00564] Fps is (10 sec: 4095.5, 60 sec: 3822.9, 300 sec: 3180.4). Total num frames: 270336. Throughput: 0: 929.7. Samples: 66900. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:58:35,263][00564] Avg episode reward: [(0, '4.536')] -[2024-09-22 11:58:38,537][02939] Updated weights for policy 0, policy_version 70 (0.0015) -[2024-09-22 11:58:40,257][00564] Fps is (10 sec: 4098.3, 60 sec: 3891.2, 300 sec: 3231.3). Total num frames: 290816. Throughput: 0: 967.9. Samples: 72820. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 11:58:40,261][00564] Avg episode reward: [(0, '4.467')] -[2024-09-22 11:58:45,257][00564] Fps is (10 sec: 3686.8, 60 sec: 3754.7, 300 sec: 3233.7). Total num frames: 307200. Throughput: 0: 939.1. Samples: 74898. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 11:58:45,259][00564] Avg episode reward: [(0, '4.446')] -[2024-09-22 11:58:45,268][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000075_307200.pth... -[2024-09-22 11:58:49,457][02939] Updated weights for policy 0, policy_version 80 (0.0027) -[2024-09-22 11:58:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3276.8). Total num frames: 327680. Throughput: 0: 923.3. Samples: 81058. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 11:58:50,261][00564] Avg episode reward: [(0, '4.487')] -[2024-09-22 11:58:55,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3315.8). Total num frames: 348160. Throughput: 0: 996.8. Samples: 87800. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 11:58:55,266][00564] Avg episode reward: [(0, '4.459')] -[2024-09-22 11:59:00,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3314.0). Total num frames: 364544. Throughput: 0: 980.8. Samples: 89842. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:59:00,264][00564] Avg episode reward: [(0, '4.364')] -[2024-09-22 11:59:00,862][02939] Updated weights for policy 0, policy_version 90 (0.0023) -[2024-09-22 11:59:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3348.0). Total num frames: 385024. Throughput: 0: 953.1. Samples: 95506. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:59:05,259][00564] Avg episode reward: [(0, '4.551')] -[2024-09-22 11:59:09,714][02939] Updated weights for policy 0, policy_version 100 (0.0022) -[2024-09-22 11:59:10,260][00564] Fps is (10 sec: 4504.3, 60 sec: 3959.3, 300 sec: 3413.2). Total num frames: 409600. Throughput: 0: 991.8. Samples: 102490. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:59:10,262][00564] Avg episode reward: [(0, '4.512')] -[2024-09-22 11:59:15,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3407.9). Total num frames: 425984. Throughput: 0: 1000.1. Samples: 105002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:59:15,259][00564] Avg episode reward: [(0, '4.371')] -[2024-09-22 11:59:20,257][00564] Fps is (10 sec: 3687.5, 60 sec: 3822.9, 300 sec: 3434.3). Total num frames: 446464. Throughput: 0: 961.6. Samples: 110172. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 11:59:20,259][00564] Avg episode reward: [(0, '4.254')] -[2024-09-22 11:59:20,846][02939] Updated weights for policy 0, policy_version 110 (0.0034) -[2024-09-22 11:59:25,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3458.8). Total num frames: 466944. Throughput: 0: 986.2. Samples: 117200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 11:59:25,261][00564] Avg episode reward: [(0, '4.360')] -[2024-09-22 11:59:30,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.8, 300 sec: 3481.6). Total num frames: 487424. Throughput: 0: 1011.2. Samples: 120400. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 11:59:30,260][00564] Avg episode reward: [(0, '4.595')] -[2024-09-22 11:59:31,758][02939] Updated weights for policy 0, policy_version 120 (0.0029) -[2024-09-22 11:59:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3474.5). Total num frames: 503808. Throughput: 0: 971.3. Samples: 124766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 11:59:35,262][00564] Avg episode reward: [(0, '4.492')] -[2024-09-22 11:59:40,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3522.6). Total num frames: 528384. Throughput: 0: 977.2. Samples: 131772. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 11:59:40,262][00564] Avg episode reward: [(0, '4.409')] -[2024-09-22 11:59:40,933][02939] Updated weights for policy 0, policy_version 130 (0.0021) -[2024-09-22 11:59:45,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3541.1). Total num frames: 548864. Throughput: 0: 1011.6. Samples: 135366. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) -[2024-09-22 11:59:45,260][00564] Avg episode reward: [(0, '4.491')] -[2024-09-22 11:59:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3532.8). Total num frames: 565248. Throughput: 0: 989.2. Samples: 140018. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 11:59:50,264][00564] Avg episode reward: [(0, '4.363')] -[2024-09-22 11:59:52,153][02939] Updated weights for policy 0, policy_version 140 (0.0022) -[2024-09-22 11:59:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3549.9). Total num frames: 585728. Throughput: 0: 980.8. Samples: 146622. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 11:59:55,264][00564] Avg episode reward: [(0, '4.610')] -[2024-09-22 12:00:00,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3590.0). Total num frames: 610304. Throughput: 0: 1001.8. Samples: 150082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:00:00,265][00564] Avg episode reward: [(0, '4.640')] -[2024-09-22 12:00:00,269][02925] Saving new best policy, reward=4.640! -[2024-09-22 12:00:01,784][02939] Updated weights for policy 0, policy_version 150 (0.0019) -[2024-09-22 12:00:05,257][00564] Fps is (10 sec: 3686.3, 60 sec: 3959.4, 300 sec: 3557.7). Total num frames: 622592. Throughput: 0: 1002.9. Samples: 155302. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:00:05,260][00564] Avg episode reward: [(0, '4.541')] -[2024-09-22 12:00:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3891.4, 300 sec: 3572.6). Total num frames: 643072. Throughput: 0: 974.4. Samples: 161050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:00:10,261][00564] Avg episode reward: [(0, '4.869')] -[2024-09-22 12:00:10,264][02925] Saving new best policy, reward=4.869! -[2024-09-22 12:00:12,204][02939] Updated weights for policy 0, policy_version 160 (0.0024) -[2024-09-22 12:00:15,257][00564] Fps is (10 sec: 4505.8, 60 sec: 4027.7, 300 sec: 3608.9). Total num frames: 667648. Throughput: 0: 981.0. Samples: 164544. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:00:15,263][00564] Avg episode reward: [(0, '4.950')] -[2024-09-22 12:00:15,272][02925] Saving new best policy, reward=4.950! -[2024-09-22 12:00:20,257][00564] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3600.2). Total num frames: 684032. Throughput: 0: 1018.4. Samples: 170594. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:00:20,260][00564] Avg episode reward: [(0, '4.941')] -[2024-09-22 12:00:23,413][02939] Updated weights for policy 0, policy_version 170 (0.0040) -[2024-09-22 12:00:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3612.9). Total num frames: 704512. Throughput: 0: 978.4. Samples: 175802. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:00:25,260][00564] Avg episode reward: [(0, '4.926')] -[2024-09-22 12:00:30,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3625.0). Total num frames: 724992. Throughput: 0: 974.7. Samples: 179228. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-09-22 12:00:30,260][00564] Avg episode reward: [(0, '5.280')] -[2024-09-22 12:00:30,265][02925] Saving new best policy, reward=5.280! -[2024-09-22 12:00:32,245][02939] Updated weights for policy 0, policy_version 180 (0.0025) -[2024-09-22 12:00:35,257][00564] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3636.4). Total num frames: 745472. Throughput: 0: 1016.4. Samples: 185756. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:00:35,262][00564] Avg episode reward: [(0, '5.315')] -[2024-09-22 12:00:35,272][02925] Saving new best policy, reward=5.315! -[2024-09-22 12:00:40,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3608.4). Total num frames: 757760. Throughput: 0: 959.8. Samples: 189812. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:00:40,259][00564] Avg episode reward: [(0, '5.252')] -[2024-09-22 12:00:43,855][02939] Updated weights for policy 0, policy_version 190 (0.0035) -[2024-09-22 12:00:45,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3638.8). Total num frames: 782336. Throughput: 0: 959.4. Samples: 193256. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:00:45,262][00564] Avg episode reward: [(0, '4.877')] -[2024-09-22 12:00:45,271][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000191_782336.pth... -[2024-09-22 12:00:50,257][00564] Fps is (10 sec: 4505.5, 60 sec: 3959.4, 300 sec: 3649.2). Total num frames: 802816. Throughput: 0: 993.7. Samples: 200018. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:00:50,263][00564] Avg episode reward: [(0, '4.896')] -[2024-09-22 12:00:55,190][02939] Updated weights for policy 0, policy_version 200 (0.0021) -[2024-09-22 12:00:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3640.9). Total num frames: 819200. Throughput: 0: 966.6. Samples: 204548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:00:55,260][00564] Avg episode reward: [(0, '4.806')] -[2024-09-22 12:01:00,259][00564] Fps is (10 sec: 3685.9, 60 sec: 3822.8, 300 sec: 3650.8). Total num frames: 839680. Throughput: 0: 952.7. Samples: 207418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:01:00,261][00564] Avg episode reward: [(0, '5.176')] -[2024-09-22 12:01:04,248][02939] Updated weights for policy 0, policy_version 210 (0.0014) -[2024-09-22 12:01:05,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3660.3). Total num frames: 860160. Throughput: 0: 971.9. Samples: 214330. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:01:05,259][00564] Avg episode reward: [(0, '5.344')] -[2024-09-22 12:01:05,286][02925] Saving new best policy, reward=5.344! -[2024-09-22 12:01:10,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3891.2, 300 sec: 3652.3). Total num frames: 876544. Throughput: 0: 972.8. Samples: 219578. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:01:10,262][00564] Avg episode reward: [(0, '5.510')] -[2024-09-22 12:01:10,266][02925] Saving new best policy, reward=5.510! -[2024-09-22 12:01:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3661.3). Total num frames: 897024. Throughput: 0: 943.2. Samples: 221670. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:01:15,262][00564] Avg episode reward: [(0, '5.518')] -[2024-09-22 12:01:15,273][02925] Saving new best policy, reward=5.518! -[2024-09-22 12:01:15,872][02939] Updated weights for policy 0, policy_version 220 (0.0028) -[2024-09-22 12:01:20,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3686.4). Total num frames: 921600. Throughput: 0: 951.8. Samples: 228586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:01:20,263][00564] Avg episode reward: [(0, '5.477')] -[2024-09-22 12:01:25,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3678.4). Total num frames: 937984. Throughput: 0: 996.8. Samples: 234666. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:01:25,260][00564] Avg episode reward: [(0, '5.308')] -[2024-09-22 12:01:26,171][02939] Updated weights for policy 0, policy_version 230 (0.0030) -[2024-09-22 12:01:30,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3670.6). Total num frames: 954368. Throughput: 0: 967.6. Samples: 236800. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:01:30,259][00564] Avg episode reward: [(0, '5.491')] -[2024-09-22 12:01:35,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3694.1). Total num frames: 978944. Throughput: 0: 956.0. Samples: 243036. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:01:35,264][00564] Avg episode reward: [(0, '5.647')] -[2024-09-22 12:01:35,274][02925] Saving new best policy, reward=5.647! -[2024-09-22 12:01:36,139][02939] Updated weights for policy 0, policy_version 240 (0.0034) -[2024-09-22 12:01:40,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3701.6). Total num frames: 999424. Throughput: 0: 1004.0. Samples: 249726. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:01:40,265][00564] Avg episode reward: [(0, '5.745')] -[2024-09-22 12:01:40,275][02925] Saving new best policy, reward=5.745! -[2024-09-22 12:01:45,260][00564] Fps is (10 sec: 3275.8, 60 sec: 3822.7, 300 sec: 3678.9). Total num frames: 1011712. Throughput: 0: 986.0. Samples: 251790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:01:45,262][00564] Avg episode reward: [(0, '5.931')] -[2024-09-22 12:01:45,277][02925] Saving new best policy, reward=5.931! -[2024-09-22 12:01:47,541][02939] Updated weights for policy 0, policy_version 250 (0.0030) -[2024-09-22 12:01:50,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3686.4). Total num frames: 1032192. Throughput: 0: 955.9. Samples: 257346. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:01:50,264][00564] Avg episode reward: [(0, '5.986')] -[2024-09-22 12:01:50,286][02925] Saving new best policy, reward=5.986! -[2024-09-22 12:01:55,257][00564] Fps is (10 sec: 4506.9, 60 sec: 3959.5, 300 sec: 3708.0). Total num frames: 1056768. Throughput: 0: 992.6. Samples: 264246. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:01:55,263][00564] Avg episode reward: [(0, '6.101')] -[2024-09-22 12:01:55,275][02925] Saving new best policy, reward=6.101! -[2024-09-22 12:01:56,848][02939] Updated weights for policy 0, policy_version 260 (0.0025) -[2024-09-22 12:02:00,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3700.5). Total num frames: 1073152. Throughput: 0: 1005.0. Samples: 266894. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:02:00,260][00564] Avg episode reward: [(0, '6.421')] -[2024-09-22 12:02:00,264][02925] Saving new best policy, reward=6.421! -[2024-09-22 12:02:05,257][00564] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3707.2). Total num frames: 1093632. Throughput: 0: 960.5. Samples: 271810. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:02:05,262][00564] Avg episode reward: [(0, '6.368')] -[2024-09-22 12:02:07,724][02939] Updated weights for policy 0, policy_version 270 (0.0032) -[2024-09-22 12:02:10,258][00564] Fps is (10 sec: 4095.3, 60 sec: 3959.4, 300 sec: 3776.7). Total num frames: 1114112. Throughput: 0: 980.3. Samples: 278780. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:02:10,265][00564] Avg episode reward: [(0, '6.956')] -[2024-09-22 12:02:10,267][02925] Saving new best policy, reward=6.956! -[2024-09-22 12:02:15,260][00564] Fps is (10 sec: 4095.0, 60 sec: 3959.3, 300 sec: 3846.1). Total num frames: 1134592. Throughput: 0: 1004.2. Samples: 281992. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:02:15,266][00564] Avg episode reward: [(0, '6.658')] -[2024-09-22 12:02:19,027][02939] Updated weights for policy 0, policy_version 280 (0.0034) -[2024-09-22 12:02:20,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 1150976. Throughput: 0: 963.2. Samples: 286380. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:02:20,259][00564] Avg episode reward: [(0, '6.940')] -[2024-09-22 12:02:25,257][00564] Fps is (10 sec: 3687.3, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 1171456. Throughput: 0: 969.9. Samples: 293372. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:02:25,262][00564] Avg episode reward: [(0, '7.214')] -[2024-09-22 12:02:25,279][02925] Saving new best policy, reward=7.214! -[2024-09-22 12:02:27,887][02939] Updated weights for policy 0, policy_version 290 (0.0022) -[2024-09-22 12:02:30,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 1196032. Throughput: 0: 999.4. Samples: 296758. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:02:30,259][00564] Avg episode reward: [(0, '7.583')] -[2024-09-22 12:02:30,262][02925] Saving new best policy, reward=7.583! -[2024-09-22 12:02:35,259][00564] Fps is (10 sec: 3685.8, 60 sec: 3822.8, 300 sec: 3901.6). Total num frames: 1208320. Throughput: 0: 982.9. Samples: 301578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:02:35,262][00564] Avg episode reward: [(0, '7.305')] -[2024-09-22 12:02:39,204][02939] Updated weights for policy 0, policy_version 300 (0.0031) -[2024-09-22 12:02:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 1232896. Throughput: 0: 967.5. Samples: 307782. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:02:40,260][00564] Avg episode reward: [(0, '7.090')] -[2024-09-22 12:02:45,257][00564] Fps is (10 sec: 4506.5, 60 sec: 4027.9, 300 sec: 3901.6). Total num frames: 1253376. Throughput: 0: 985.5. Samples: 311242. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-09-22 12:02:45,270][00564] Avg episode reward: [(0, '7.411')] -[2024-09-22 12:02:45,279][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000306_1253376.pth... -[2024-09-22 12:02:45,449][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000075_307200.pth -[2024-09-22 12:02:50,258][00564] Fps is (10 sec: 3276.5, 60 sec: 3891.1, 300 sec: 3887.7). Total num frames: 1265664. Throughput: 0: 991.4. Samples: 316426. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:02:50,261][00564] Avg episode reward: [(0, '7.698')] -[2024-09-22 12:02:50,268][02925] Saving new best policy, reward=7.698! -[2024-09-22 12:02:50,559][02939] Updated weights for policy 0, policy_version 310 (0.0013) -[2024-09-22 12:02:55,262][00564] Fps is (10 sec: 3684.6, 60 sec: 3890.9, 300 sec: 3901.5). Total num frames: 1290240. Throughput: 0: 964.1. Samples: 322168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:02:55,268][00564] Avg episode reward: [(0, '8.189')] -[2024-09-22 12:02:55,275][02925] Saving new best policy, reward=8.189! -[2024-09-22 12:02:59,503][02939] Updated weights for policy 0, policy_version 320 (0.0044) -[2024-09-22 12:03:00,257][00564] Fps is (10 sec: 4506.1, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 1310720. Throughput: 0: 970.9. Samples: 325680. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:03:00,259][00564] Avg episode reward: [(0, '8.694')] -[2024-09-22 12:03:00,266][02925] Saving new best policy, reward=8.694! -[2024-09-22 12:03:05,258][00564] Fps is (10 sec: 3687.9, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 1327104. Throughput: 0: 1002.2. Samples: 331482. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:03:05,263][00564] Avg episode reward: [(0, '8.634')] -[2024-09-22 12:03:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3901.6). Total num frames: 1343488. Throughput: 0: 942.1. Samples: 335766. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:03:10,264][00564] Avg episode reward: [(0, '8.335')] -[2024-09-22 12:03:11,610][02939] Updated weights for policy 0, policy_version 330 (0.0030) -[2024-09-22 12:03:15,257][00564] Fps is (10 sec: 3686.7, 60 sec: 3823.1, 300 sec: 3887.7). Total num frames: 1363968. Throughput: 0: 943.5. Samples: 339214. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:03:15,263][00564] Avg episode reward: [(0, '8.924')] -[2024-09-22 12:03:15,346][02925] Saving new best policy, reward=8.924! -[2024-09-22 12:03:20,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 1384448. Throughput: 0: 978.8. Samples: 345624. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:03:20,263][00564] Avg episode reward: [(0, '8.853')] -[2024-09-22 12:03:22,797][02939] Updated weights for policy 0, policy_version 340 (0.0023) -[2024-09-22 12:03:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3901.7). Total num frames: 1400832. Throughput: 0: 933.6. Samples: 349794. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:03:25,265][00564] Avg episode reward: [(0, '9.448')] -[2024-09-22 12:03:25,272][02925] Saving new best policy, reward=9.448! -[2024-09-22 12:03:30,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 1421312. Throughput: 0: 927.8. Samples: 352994. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:03:30,265][00564] Avg episode reward: [(0, '8.878')] -[2024-09-22 12:03:32,301][02939] Updated weights for policy 0, policy_version 350 (0.0023) -[2024-09-22 12:03:35,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3901.6). Total num frames: 1441792. Throughput: 0: 967.0. Samples: 359942. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) -[2024-09-22 12:03:35,259][00564] Avg episode reward: [(0, '8.925')] -[2024-09-22 12:03:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 1458176. Throughput: 0: 945.4. Samples: 364706. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:03:40,259][00564] Avg episode reward: [(0, '8.640')] -[2024-09-22 12:03:44,179][02939] Updated weights for policy 0, policy_version 360 (0.0014) -[2024-09-22 12:03:45,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 1478656. Throughput: 0: 919.9. Samples: 367076. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:03:45,259][00564] Avg episode reward: [(0, '9.323')] -[2024-09-22 12:03:50,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3901.6). Total num frames: 1499136. Throughput: 0: 937.8. Samples: 373684. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:03:50,260][00564] Avg episode reward: [(0, '9.939')] -[2024-09-22 12:03:50,262][02925] Saving new best policy, reward=9.939! -[2024-09-22 12:03:54,777][02939] Updated weights for policy 0, policy_version 370 (0.0030) -[2024-09-22 12:03:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3755.0, 300 sec: 3901.6). Total num frames: 1515520. Throughput: 0: 961.8. Samples: 379048. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:03:55,259][00564] Avg episode reward: [(0, '10.556')] -[2024-09-22 12:03:55,273][02925] Saving new best policy, reward=10.556! -[2024-09-22 12:04:00,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3887.7). Total num frames: 1531904. Throughput: 0: 927.9. Samples: 380970. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:04:00,261][00564] Avg episode reward: [(0, '10.915')] -[2024-09-22 12:04:00,264][02925] Saving new best policy, reward=10.915! -[2024-09-22 12:04:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3873.9). Total num frames: 1552384. Throughput: 0: 923.1. Samples: 387162. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) -[2024-09-22 12:04:05,263][00564] Avg episode reward: [(0, '10.942')] -[2024-09-22 12:04:05,272][02925] Saving new best policy, reward=10.942! -[2024-09-22 12:04:05,549][02939] Updated weights for policy 0, policy_version 380 (0.0041) -[2024-09-22 12:04:10,258][00564] Fps is (10 sec: 4095.4, 60 sec: 3822.8, 300 sec: 3887.7). Total num frames: 1572864. Throughput: 0: 972.0. Samples: 393534. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:04:10,263][00564] Avg episode reward: [(0, '9.896')] -[2024-09-22 12:04:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 1589248. Throughput: 0: 948.4. Samples: 395670. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:04:15,262][00564] Avg episode reward: [(0, '9.865')] -[2024-09-22 12:04:16,911][02939] Updated weights for policy 0, policy_version 390 (0.0034) -[2024-09-22 12:04:20,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 1609728. Throughput: 0: 923.4. Samples: 401494. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:04:20,262][00564] Avg episode reward: [(0, '9.747')] -[2024-09-22 12:04:25,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 1634304. Throughput: 0: 974.5. Samples: 408560. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:04:25,260][00564] Avg episode reward: [(0, '10.459')] -[2024-09-22 12:04:25,942][02939] Updated weights for policy 0, policy_version 400 (0.0019) -[2024-09-22 12:04:30,261][00564] Fps is (10 sec: 3684.9, 60 sec: 3754.4, 300 sec: 3873.8). Total num frames: 1646592. Throughput: 0: 977.0. Samples: 411044. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:04:30,263][00564] Avg episode reward: [(0, '10.897')] -[2024-09-22 12:04:35,257][00564] Fps is (10 sec: 3686.3, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 1671168. Throughput: 0: 944.3. Samples: 416176. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:04:35,260][00564] Avg episode reward: [(0, '11.721')] -[2024-09-22 12:04:35,269][02925] Saving new best policy, reward=11.721! -[2024-09-22 12:04:36,955][02939] Updated weights for policy 0, policy_version 410 (0.0026) -[2024-09-22 12:04:40,257][00564] Fps is (10 sec: 4507.3, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1691648. Throughput: 0: 978.3. Samples: 423070. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:04:40,260][00564] Avg episode reward: [(0, '12.566')] -[2024-09-22 12:04:40,264][02925] Saving new best policy, reward=12.566! -[2024-09-22 12:04:45,260][00564] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3873.8). Total num frames: 1708032. Throughput: 0: 1003.9. Samples: 426148. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:04:45,265][00564] Avg episode reward: [(0, '12.629')] -[2024-09-22 12:04:45,277][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000417_1708032.pth... -[2024-09-22 12:04:45,414][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000191_782336.pth -[2024-09-22 12:04:45,430][02925] Saving new best policy, reward=12.629! -[2024-09-22 12:04:48,474][02939] Updated weights for policy 0, policy_version 420 (0.0029) -[2024-09-22 12:04:50,257][00564] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 1728512. Throughput: 0: 963.4. Samples: 430516. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:04:50,262][00564] Avg episode reward: [(0, '13.551')] -[2024-09-22 12:04:50,265][02925] Saving new best policy, reward=13.551! -[2024-09-22 12:04:55,257][00564] Fps is (10 sec: 4097.1, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 1748992. Throughput: 0: 977.2. Samples: 437508. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:04:55,259][00564] Avg episode reward: [(0, '13.036')] -[2024-09-22 12:04:57,234][02939] Updated weights for policy 0, policy_version 430 (0.0032) -[2024-09-22 12:05:00,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 1769472. Throughput: 0: 1007.2. Samples: 440996. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) -[2024-09-22 12:05:00,259][00564] Avg episode reward: [(0, '12.661')] -[2024-09-22 12:05:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1785856. Throughput: 0: 978.4. Samples: 445520. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:05:05,259][00564] Avg episode reward: [(0, '13.377')] -[2024-09-22 12:05:08,513][02939] Updated weights for policy 0, policy_version 440 (0.0021) -[2024-09-22 12:05:10,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3860.0). Total num frames: 1806336. Throughput: 0: 962.4. Samples: 451870. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:05:10,260][00564] Avg episode reward: [(0, '13.118')] -[2024-09-22 12:05:15,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3887.7). Total num frames: 1830912. Throughput: 0: 985.4. Samples: 455384. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:05:15,260][00564] Avg episode reward: [(0, '13.357')] -[2024-09-22 12:05:19,248][02939] Updated weights for policy 0, policy_version 450 (0.0029) -[2024-09-22 12:05:20,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 1843200. Throughput: 0: 988.4. Samples: 460652. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:05:20,259][00564] Avg episode reward: [(0, '13.531')] -[2024-09-22 12:05:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1867776. Throughput: 0: 964.1. Samples: 466456. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:05:25,260][00564] Avg episode reward: [(0, '13.682')] -[2024-09-22 12:05:25,268][02925] Saving new best policy, reward=13.682! -[2024-09-22 12:05:28,604][02939] Updated weights for policy 0, policy_version 460 (0.0023) -[2024-09-22 12:05:30,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4028.0, 300 sec: 3873.8). Total num frames: 1888256. Throughput: 0: 973.3. Samples: 469946. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:05:30,260][00564] Avg episode reward: [(0, '14.512')] -[2024-09-22 12:05:30,263][02925] Saving new best policy, reward=14.512! -[2024-09-22 12:05:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 1904640. Throughput: 0: 1006.6. Samples: 475814. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:05:35,259][00564] Avg episode reward: [(0, '14.679')] -[2024-09-22 12:05:35,268][02925] Saving new best policy, reward=14.679! -[2024-09-22 12:05:40,197][02939] Updated weights for policy 0, policy_version 470 (0.0026) -[2024-09-22 12:05:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1925120. Throughput: 0: 962.4. Samples: 480814. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:05:40,268][00564] Avg episode reward: [(0, '15.380')] -[2024-09-22 12:05:40,271][02925] Saving new best policy, reward=15.380! -[2024-09-22 12:05:45,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3873.8). Total num frames: 1945600. Throughput: 0: 960.6. Samples: 484224. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:05:45,259][00564] Avg episode reward: [(0, '15.162')] -[2024-09-22 12:05:50,258][00564] Fps is (10 sec: 3686.0, 60 sec: 3891.1, 300 sec: 3873.8). Total num frames: 1961984. Throughput: 0: 1000.3. Samples: 490536. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:05:50,263][00564] Avg episode reward: [(0, '15.142')] -[2024-09-22 12:05:50,868][02939] Updated weights for policy 0, policy_version 480 (0.0049) -[2024-09-22 12:05:55,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 1978368. Throughput: 0: 950.3. Samples: 494636. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:05:55,264][00564] Avg episode reward: [(0, '15.213')] -[2024-09-22 12:06:00,257][00564] Fps is (10 sec: 4096.5, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 2002944. Throughput: 0: 946.0. Samples: 497954. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:06:00,263][00564] Avg episode reward: [(0, '15.717')] -[2024-09-22 12:06:00,266][02925] Saving new best policy, reward=15.717! -[2024-09-22 12:06:01,423][02939] Updated weights for policy 0, policy_version 490 (0.0022) -[2024-09-22 12:06:05,257][00564] Fps is (10 sec: 4096.2, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 2019328. Throughput: 0: 977.3. Samples: 504632. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:06:05,262][00564] Avg episode reward: [(0, '14.790')] -[2024-09-22 12:06:10,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 2035712. Throughput: 0: 945.5. Samples: 509002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:06:10,263][00564] Avg episode reward: [(0, '14.975')] -[2024-09-22 12:06:12,794][02939] Updated weights for policy 0, policy_version 500 (0.0029) -[2024-09-22 12:06:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 2056192. Throughput: 0: 933.4. Samples: 511950. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:06:15,263][00564] Avg episode reward: [(0, '15.269')] -[2024-09-22 12:06:20,257][00564] Fps is (10 sec: 4096.2, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 2076672. Throughput: 0: 946.6. Samples: 518410. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:06:20,261][00564] Avg episode reward: [(0, '15.993')] -[2024-09-22 12:06:20,287][02925] Saving new best policy, reward=15.993! -[2024-09-22 12:06:23,520][02939] Updated weights for policy 0, policy_version 510 (0.0049) -[2024-09-22 12:06:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 2093056. Throughput: 0: 942.8. Samples: 523240. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:06:25,264][00564] Avg episode reward: [(0, '16.240')] -[2024-09-22 12:06:25,276][02925] Saving new best policy, reward=16.240! -[2024-09-22 12:06:30,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 2109440. Throughput: 0: 909.2. Samples: 525138. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:06:30,260][00564] Avg episode reward: [(0, '17.546')] -[2024-09-22 12:06:30,265][02925] Saving new best policy, reward=17.546! -[2024-09-22 12:06:34,707][02939] Updated weights for policy 0, policy_version 520 (0.0019) -[2024-09-22 12:06:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 2129920. Throughput: 0: 909.6. Samples: 531466. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:06:35,259][00564] Avg episode reward: [(0, '16.963')] -[2024-09-22 12:06:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 2146304. Throughput: 0: 939.7. Samples: 536922. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:06:40,260][00564] Avg episode reward: [(0, '17.208')] -[2024-09-22 12:06:45,257][00564] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3818.3). Total num frames: 2158592. Throughput: 0: 908.5. Samples: 538838. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:06:45,264][00564] Avg episode reward: [(0, '16.878')] -[2024-09-22 12:06:45,272][00564] Components not started: RolloutWorker_w0, wait_time=600.0 seconds -[2024-09-22 12:06:45,288][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000528_2162688.pth... -[2024-09-22 12:06:45,417][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000306_1253376.pth -[2024-09-22 12:06:47,385][02939] Updated weights for policy 0, policy_version 530 (0.0020) -[2024-09-22 12:06:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3818.3). Total num frames: 2183168. Throughput: 0: 881.3. Samples: 544290. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:06:50,260][00564] Avg episode reward: [(0, '17.517')] -[2024-09-22 12:06:55,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 2199552. Throughput: 0: 925.5. Samples: 550650. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:06:55,264][00564] Avg episode reward: [(0, '18.107')] -[2024-09-22 12:06:55,275][02925] Saving new best policy, reward=18.107! -[2024-09-22 12:06:58,757][02939] Updated weights for policy 0, policy_version 540 (0.0029) -[2024-09-22 12:07:00,257][00564] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3790.5). Total num frames: 2211840. Throughput: 0: 899.8. Samples: 552442. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:07:00,259][00564] Avg episode reward: [(0, '18.823')] -[2024-09-22 12:07:00,266][02925] Saving new best policy, reward=18.823! -[2024-09-22 12:07:05,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3790.6). Total num frames: 2232320. Throughput: 0: 863.1. Samples: 557248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:07:05,261][00564] Avg episode reward: [(0, '19.792')] -[2024-09-22 12:07:05,272][02925] Saving new best policy, reward=19.792! -[2024-09-22 12:07:09,444][02939] Updated weights for policy 0, policy_version 550 (0.0024) -[2024-09-22 12:07:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3790.6). Total num frames: 2252800. Throughput: 0: 894.9. Samples: 563512. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:07:10,264][00564] Avg episode reward: [(0, '20.138')] -[2024-09-22 12:07:10,267][02925] Saving new best policy, reward=20.138! -[2024-09-22 12:07:15,259][00564] Fps is (10 sec: 3685.8, 60 sec: 3549.8, 300 sec: 3790.5). Total num frames: 2269184. Throughput: 0: 909.3. Samples: 566060. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:07:15,264][00564] Avg episode reward: [(0, '19.187')] -[2024-09-22 12:07:20,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3776.7). Total num frames: 2285568. Throughput: 0: 860.0. Samples: 570166. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:07:20,260][00564] Avg episode reward: [(0, '21.074')] -[2024-09-22 12:07:20,261][02925] Saving new best policy, reward=21.074! -[2024-09-22 12:07:21,863][02939] Updated weights for policy 0, policy_version 560 (0.0026) -[2024-09-22 12:07:25,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3549.9, 300 sec: 3762.8). Total num frames: 2306048. Throughput: 0: 879.8. Samples: 576512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:07:25,259][00564] Avg episode reward: [(0, '19.628')] -[2024-09-22 12:07:30,259][00564] Fps is (10 sec: 3685.8, 60 sec: 3549.8, 300 sec: 3776.7). Total num frames: 2322432. Throughput: 0: 911.4. Samples: 579852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:07:30,261][00564] Avg episode reward: [(0, '20.178')] -[2024-09-22 12:07:33,257][02939] Updated weights for policy 0, policy_version 570 (0.0023) -[2024-09-22 12:07:35,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3748.9). Total num frames: 2338816. Throughput: 0: 883.2. Samples: 584032. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:07:35,260][00564] Avg episode reward: [(0, '19.354')] -[2024-09-22 12:07:40,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3549.9, 300 sec: 3748.9). Total num frames: 2359296. Throughput: 0: 882.0. Samples: 590338. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:07:40,263][00564] Avg episode reward: [(0, '20.230')] -[2024-09-22 12:07:43,000][02939] Updated weights for policy 0, policy_version 580 (0.0032) -[2024-09-22 12:07:45,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 2383872. Throughput: 0: 916.1. Samples: 593668. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:07:45,259][00564] Avg episode reward: [(0, '19.011')] -[2024-09-22 12:07:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3748.9). Total num frames: 2396160. Throughput: 0: 923.5. Samples: 598806. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:07:50,259][00564] Avg episode reward: [(0, '19.385')] -[2024-09-22 12:07:54,207][02939] Updated weights for policy 0, policy_version 590 (0.0043) -[2024-09-22 12:07:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 2420736. Throughput: 0: 914.7. Samples: 604674. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:07:55,262][00564] Avg episode reward: [(0, '18.563')] -[2024-09-22 12:08:00,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 2441216. Throughput: 0: 936.2. Samples: 608186. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-09-22 12:08:00,260][00564] Avg episode reward: [(0, '17.560')] -[2024-09-22 12:08:04,563][02939] Updated weights for policy 0, policy_version 600 (0.0020) -[2024-09-22 12:08:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 2457600. Throughput: 0: 972.7. Samples: 613936. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:08:05,259][00564] Avg episode reward: [(0, '17.569')] -[2024-09-22 12:08:10,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 2478080. Throughput: 0: 941.0. Samples: 618858. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:08:10,260][00564] Avg episode reward: [(0, '18.019')] -[2024-09-22 12:08:15,125][02939] Updated weights for policy 0, policy_version 610 (0.0021) -[2024-09-22 12:08:15,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3776.7). Total num frames: 2498560. Throughput: 0: 940.0. Samples: 622150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:08:15,261][00564] Avg episode reward: [(0, '17.417')] -[2024-09-22 12:08:20,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 2514944. Throughput: 0: 989.6. Samples: 628566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:08:20,261][00564] Avg episode reward: [(0, '18.545')] -[2024-09-22 12:08:25,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 2531328. Throughput: 0: 946.9. Samples: 632948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:08:25,259][00564] Avg episode reward: [(0, '18.794')] -[2024-09-22 12:08:26,419][02939] Updated weights for policy 0, policy_version 620 (0.0021) -[2024-09-22 12:08:30,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3776.7). Total num frames: 2555904. Throughput: 0: 951.4. Samples: 636480. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) -[2024-09-22 12:08:30,259][00564] Avg episode reward: [(0, '18.973')] -[2024-09-22 12:08:35,278][00564] Fps is (10 sec: 4496.2, 60 sec: 3958.1, 300 sec: 3790.3). Total num frames: 2576384. Throughput: 0: 985.3. Samples: 643164. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:08:35,281][00564] Avg episode reward: [(0, '20.161')] -[2024-09-22 12:08:36,532][02939] Updated weights for policy 0, policy_version 630 (0.0040) -[2024-09-22 12:08:40,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2588672. Throughput: 0: 953.2. Samples: 647566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:08:40,261][00564] Avg episode reward: [(0, '21.471')] -[2024-09-22 12:08:40,265][02925] Saving new best policy, reward=21.471! -[2024-09-22 12:08:45,257][00564] Fps is (10 sec: 3283.7, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 2609152. Throughput: 0: 938.7. Samples: 650428. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:08:45,259][00564] Avg episode reward: [(0, '22.327')] -[2024-09-22 12:08:45,277][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000637_2609152.pth... -[2024-09-22 12:08:45,406][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000417_1708032.pth -[2024-09-22 12:08:45,424][02925] Saving new best policy, reward=22.327! -[2024-09-22 12:08:47,217][02939] Updated weights for policy 0, policy_version 640 (0.0017) -[2024-09-22 12:08:50,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3790.5). Total num frames: 2633728. Throughput: 0: 963.8. Samples: 657308. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:08:50,259][00564] Avg episode reward: [(0, '22.854')] -[2024-09-22 12:08:50,266][02925] Saving new best policy, reward=22.854! -[2024-09-22 12:08:55,259][00564] Fps is (10 sec: 4095.2, 60 sec: 3822.8, 300 sec: 3790.5). Total num frames: 2650112. Throughput: 0: 971.6. Samples: 662580. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:08:55,261][00564] Avg episode reward: [(0, '21.214')] -[2024-09-22 12:08:58,242][02939] Updated weights for policy 0, policy_version 650 (0.0013) -[2024-09-22 12:09:00,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 2670592. Throughput: 0: 950.8. Samples: 664936. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:09:00,261][00564] Avg episode reward: [(0, '20.250')] -[2024-09-22 12:09:05,257][00564] Fps is (10 sec: 4506.5, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 2695168. Throughput: 0: 966.6. Samples: 672064. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:09:05,264][00564] Avg episode reward: [(0, '18.547')] -[2024-09-22 12:09:06,798][02939] Updated weights for policy 0, policy_version 660 (0.0014) -[2024-09-22 12:09:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 2711552. Throughput: 0: 1001.2. Samples: 678002. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) -[2024-09-22 12:09:10,263][00564] Avg episode reward: [(0, '19.469')] -[2024-09-22 12:09:15,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 2727936. Throughput: 0: 967.5. Samples: 680018. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:09:15,261][00564] Avg episode reward: [(0, '19.685')] -[2024-09-22 12:09:18,478][02939] Updated weights for policy 0, policy_version 670 (0.0021) -[2024-09-22 12:09:20,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 2748416. Throughput: 0: 961.4. Samples: 686406. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:09:20,259][00564] Avg episode reward: [(0, '18.393')] -[2024-09-22 12:09:25,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3804.5). Total num frames: 2768896. Throughput: 0: 1001.7. Samples: 692644. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:09:25,265][00564] Avg episode reward: [(0, '18.293')] -[2024-09-22 12:09:30,157][02939] Updated weights for policy 0, policy_version 680 (0.0032) -[2024-09-22 12:09:30,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 2785280. Throughput: 0: 982.4. Samples: 694636. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:09:30,261][00564] Avg episode reward: [(0, '19.445')] -[2024-09-22 12:09:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3824.3, 300 sec: 3776.7). Total num frames: 2805760. Throughput: 0: 961.1. Samples: 700556. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:09:35,259][00564] Avg episode reward: [(0, '20.040')] -[2024-09-22 12:09:38,944][02939] Updated weights for policy 0, policy_version 690 (0.0018) -[2024-09-22 12:09:40,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3804.5). Total num frames: 2830336. Throughput: 0: 999.8. Samples: 707570. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:09:40,263][00564] Avg episode reward: [(0, '21.272')] -[2024-09-22 12:09:45,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3776.6). Total num frames: 2842624. Throughput: 0: 998.4. Samples: 709864. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:09:45,260][00564] Avg episode reward: [(0, '22.525')] -[2024-09-22 12:09:50,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 2863104. Throughput: 0: 953.6. Samples: 714974. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:09:50,259][00564] Avg episode reward: [(0, '24.205')] -[2024-09-22 12:09:50,328][02925] Saving new best policy, reward=24.205! -[2024-09-22 12:09:50,335][02939] Updated weights for policy 0, policy_version 700 (0.0016) -[2024-09-22 12:09:55,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.6, 300 sec: 3790.5). Total num frames: 2887680. Throughput: 0: 976.8. Samples: 721956. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:09:55,260][00564] Avg episode reward: [(0, '24.621')] -[2024-09-22 12:09:55,268][02925] Saving new best policy, reward=24.621! -[2024-09-22 12:10:00,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 2904064. Throughput: 0: 1001.9. Samples: 725104. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:10:00,263][00564] Avg episode reward: [(0, '24.019')] -[2024-09-22 12:10:00,636][02939] Updated weights for policy 0, policy_version 710 (0.0032) -[2024-09-22 12:10:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 2924544. Throughput: 0: 958.2. Samples: 729526. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:10:05,262][00564] Avg episode reward: [(0, '23.558')] -[2024-09-22 12:10:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 2945024. Throughput: 0: 972.2. Samples: 736392. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:10:10,260][00564] Avg episode reward: [(0, '23.822')] -[2024-09-22 12:10:10,510][02939] Updated weights for policy 0, policy_version 720 (0.0018) -[2024-09-22 12:10:15,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 2965504. Throughput: 0: 1002.2. Samples: 739736. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:10:15,263][00564] Avg episode reward: [(0, '23.137')] -[2024-09-22 12:10:20,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2977792. Throughput: 0: 968.9. Samples: 744158. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:10:20,262][00564] Avg episode reward: [(0, '23.408')] -[2024-09-22 12:10:22,225][02939] Updated weights for policy 0, policy_version 730 (0.0022) -[2024-09-22 12:10:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3776.6). Total num frames: 3002368. Throughput: 0: 955.5. Samples: 750566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:10:25,262][00564] Avg episode reward: [(0, '21.859')] -[2024-09-22 12:10:30,261][00564] Fps is (10 sec: 4503.8, 60 sec: 3959.2, 300 sec: 3790.5). Total num frames: 3022848. Throughput: 0: 981.4. Samples: 754030. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:10:30,263][00564] Avg episode reward: [(0, '21.459')] -[2024-09-22 12:10:31,866][02939] Updated weights for policy 0, policy_version 740 (0.0018) -[2024-09-22 12:10:35,262][00564] Fps is (10 sec: 3684.6, 60 sec: 3890.9, 300 sec: 3776.6). Total num frames: 3039232. Throughput: 0: 984.5. Samples: 759280. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:10:35,267][00564] Avg episode reward: [(0, '20.418')] -[2024-09-22 12:10:40,257][00564] Fps is (10 sec: 3687.8, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 3059712. Throughput: 0: 959.7. Samples: 765142. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:10:40,265][00564] Avg episode reward: [(0, '19.682')] -[2024-09-22 12:10:42,389][02939] Updated weights for policy 0, policy_version 750 (0.0023) -[2024-09-22 12:10:45,257][00564] Fps is (10 sec: 4507.7, 60 sec: 4027.7, 300 sec: 3804.4). Total num frames: 3084288. Throughput: 0: 966.0. Samples: 768574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) -[2024-09-22 12:10:45,260][00564] Avg episode reward: [(0, '20.476')] -[2024-09-22 12:10:45,272][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000753_3084288.pth... -[2024-09-22 12:10:45,393][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000528_2162688.pth -[2024-09-22 12:10:50,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 3100672. Throughput: 0: 997.7. Samples: 774422. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:10:50,259][00564] Avg episode reward: [(0, '21.444')] -[2024-09-22 12:10:53,759][02939] Updated weights for policy 0, policy_version 760 (0.0023) -[2024-09-22 12:10:55,262][00564] Fps is (10 sec: 3275.3, 60 sec: 3822.6, 300 sec: 3776.6). Total num frames: 3117056. Throughput: 0: 958.2. Samples: 779514. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:10:55,270][00564] Avg episode reward: [(0, '21.795')] -[2024-09-22 12:11:00,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 3141632. Throughput: 0: 958.8. Samples: 782882. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) -[2024-09-22 12:11:00,263][00564] Avg episode reward: [(0, '21.647')] -[2024-09-22 12:11:02,626][02939] Updated weights for policy 0, policy_version 770 (0.0027) -[2024-09-22 12:11:05,260][00564] Fps is (10 sec: 4096.9, 60 sec: 3891.0, 300 sec: 3804.4). Total num frames: 3158016. Throughput: 0: 1001.8. Samples: 789240. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:11:05,265][00564] Avg episode reward: [(0, '21.520')] -[2024-09-22 12:11:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 3174400. Throughput: 0: 958.2. Samples: 793684. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:11:10,262][00564] Avg episode reward: [(0, '21.675')] -[2024-09-22 12:11:14,201][02939] Updated weights for policy 0, policy_version 780 (0.0027) -[2024-09-22 12:11:15,257][00564] Fps is (10 sec: 3687.4, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 3194880. Throughput: 0: 956.6. Samples: 797072. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:11:15,259][00564] Avg episode reward: [(0, '21.778')] -[2024-09-22 12:11:20,263][00564] Fps is (10 sec: 4503.0, 60 sec: 4027.3, 300 sec: 3818.2). Total num frames: 3219456. Throughput: 0: 994.2. Samples: 804022. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:11:20,265][00564] Avg episode reward: [(0, '22.753')] -[2024-09-22 12:11:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3804.4). Total num frames: 3231744. Throughput: 0: 965.1. Samples: 808572. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:11:25,262][00564] Avg episode reward: [(0, '23.360')] -[2024-09-22 12:11:25,290][02939] Updated weights for policy 0, policy_version 790 (0.0026) -[2024-09-22 12:11:30,259][00564] Fps is (10 sec: 3687.8, 60 sec: 3891.3, 300 sec: 3818.3). Total num frames: 3256320. Throughput: 0: 960.4. Samples: 811794. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:11:30,268][00564] Avg episode reward: [(0, '23.530')] -[2024-09-22 12:11:34,070][02939] Updated weights for policy 0, policy_version 800 (0.0018) -[2024-09-22 12:11:35,258][00564] Fps is (10 sec: 4914.7, 60 sec: 4028.0, 300 sec: 3846.1). Total num frames: 3280896. Throughput: 0: 987.8. Samples: 818876. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:11:35,263][00564] Avg episode reward: [(0, '23.236')] -[2024-09-22 12:11:40,257][00564] Fps is (10 sec: 3687.1, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 3293184. Throughput: 0: 990.5. Samples: 824080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:11:40,263][00564] Avg episode reward: [(0, '23.545')] -[2024-09-22 12:11:45,257][00564] Fps is (10 sec: 3277.2, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 3313664. Throughput: 0: 970.8. Samples: 826566. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:11:45,265][00564] Avg episode reward: [(0, '23.825')] -[2024-09-22 12:11:45,268][02939] Updated weights for policy 0, policy_version 810 (0.0025) -[2024-09-22 12:11:50,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 3338240. Throughput: 0: 985.6. Samples: 833588. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:11:50,261][00564] Avg episode reward: [(0, '23.526')] -[2024-09-22 12:11:55,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.8, 300 sec: 3873.8). Total num frames: 3354624. Throughput: 0: 1017.1. Samples: 839452. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:11:55,260][00564] Avg episode reward: [(0, '24.016')] -[2024-09-22 12:11:55,482][02939] Updated weights for policy 0, policy_version 820 (0.0028) -[2024-09-22 12:12:00,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 3375104. Throughput: 0: 990.1. Samples: 841628. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:12:00,260][00564] Avg episode reward: [(0, '23.877')] -[2024-09-22 12:12:05,180][02939] Updated weights for policy 0, policy_version 830 (0.0038) -[2024-09-22 12:12:05,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.9, 300 sec: 3887.7). Total num frames: 3399680. Throughput: 0: 986.0. Samples: 848388. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:12:05,261][00564] Avg episode reward: [(0, '23.069')] -[2024-09-22 12:12:10,258][00564] Fps is (10 sec: 4095.4, 60 sec: 4027.6, 300 sec: 3887.7). Total num frames: 3416064. Throughput: 0: 1027.7. Samples: 854822. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:12:10,261][00564] Avg episode reward: [(0, '21.703')] -[2024-09-22 12:12:15,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 3432448. Throughput: 0: 1004.8. Samples: 857010. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:12:15,259][00564] Avg episode reward: [(0, '21.238')] -[2024-09-22 12:12:16,430][02939] Updated weights for policy 0, policy_version 840 (0.0023) -[2024-09-22 12:12:20,257][00564] Fps is (10 sec: 4096.6, 60 sec: 3959.9, 300 sec: 3901.6). Total num frames: 3457024. Throughput: 0: 980.3. Samples: 862988. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:12:20,259][00564] Avg episode reward: [(0, '21.717')] -[2024-09-22 12:12:25,052][02939] Updated weights for policy 0, policy_version 850 (0.0013) -[2024-09-22 12:12:25,257][00564] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 3481600. Throughput: 0: 1023.5. Samples: 870136. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:12:25,260][00564] Avg episode reward: [(0, '21.569')] -[2024-09-22 12:12:30,260][00564] Fps is (10 sec: 3685.3, 60 sec: 3959.4, 300 sec: 3915.5). Total num frames: 3493888. Throughput: 0: 1016.8. Samples: 872326. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:12:30,262][00564] Avg episode reward: [(0, '22.038')] -[2024-09-22 12:12:35,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3891.3, 300 sec: 3915.5). Total num frames: 3514368. Throughput: 0: 981.9. Samples: 877774. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) -[2024-09-22 12:12:35,259][00564] Avg episode reward: [(0, '23.234')] -[2024-09-22 12:12:36,153][02939] Updated weights for policy 0, policy_version 860 (0.0021) -[2024-09-22 12:12:40,257][00564] Fps is (10 sec: 4506.9, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 3538944. Throughput: 0: 1004.9. Samples: 884674. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) -[2024-09-22 12:12:40,259][00564] Avg episode reward: [(0, '23.811')] -[2024-09-22 12:12:45,264][00564] Fps is (10 sec: 4093.2, 60 sec: 4027.3, 300 sec: 3929.3). Total num frames: 3555328. Throughput: 0: 1021.3. Samples: 887592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:12:45,266][00564] Avg episode reward: [(0, '24.913')] -[2024-09-22 12:12:45,279][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000868_3555328.pth... -[2024-09-22 12:12:45,461][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000637_2609152.pth -[2024-09-22 12:12:45,481][02925] Saving new best policy, reward=24.913! -[2024-09-22 12:12:47,922][02939] Updated weights for policy 0, policy_version 870 (0.0034) -[2024-09-22 12:12:50,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 3571712. Throughput: 0: 967.4. Samples: 891920. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:12:50,265][00564] Avg episode reward: [(0, '24.021')] -[2024-09-22 12:12:55,257][00564] Fps is (10 sec: 4098.8, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 3596288. Throughput: 0: 978.0. Samples: 898832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:12:55,261][00564] Avg episode reward: [(0, '22.475')] -[2024-09-22 12:12:56,674][02939] Updated weights for policy 0, policy_version 880 (0.0025) -[2024-09-22 12:13:00,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 3612672. Throughput: 0: 1006.1. Samples: 902286. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:13:00,262][00564] Avg episode reward: [(0, '23.169')] -[2024-09-22 12:13:05,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 3629056. Throughput: 0: 970.0. Samples: 906640. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:13:05,264][00564] Avg episode reward: [(0, '21.721')] -[2024-09-22 12:13:08,166][02939] Updated weights for policy 0, policy_version 890 (0.0028) -[2024-09-22 12:13:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3915.5). Total num frames: 3653632. Throughput: 0: 957.0. Samples: 913200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:13:10,264][00564] Avg episode reward: [(0, '20.285')] -[2024-09-22 12:13:15,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 3674112. Throughput: 0: 985.2. Samples: 916658. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:13:15,260][00564] Avg episode reward: [(0, '21.309')] -[2024-09-22 12:13:18,927][02939] Updated weights for policy 0, policy_version 900 (0.0023) -[2024-09-22 12:13:20,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 3686400. Throughput: 0: 974.7. Samples: 921636. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:13:20,259][00564] Avg episode reward: [(0, '22.630')] -[2024-09-22 12:13:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 3710976. Throughput: 0: 951.7. Samples: 927502. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:13:25,263][00564] Avg episode reward: [(0, '22.529')] -[2024-09-22 12:13:28,666][02939] Updated weights for policy 0, policy_version 910 (0.0016) -[2024-09-22 12:13:30,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.7, 300 sec: 3915.8). Total num frames: 3731456. Throughput: 0: 963.5. Samples: 930942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) -[2024-09-22 12:13:30,260][00564] Avg episode reward: [(0, '23.620')] -[2024-09-22 12:13:35,265][00564] Fps is (10 sec: 3683.5, 60 sec: 3890.7, 300 sec: 3929.3). Total num frames: 3747840. Throughput: 0: 995.5. Samples: 936726. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:13:35,269][00564] Avg episode reward: [(0, '24.449')] -[2024-09-22 12:13:40,047][02939] Updated weights for policy 0, policy_version 920 (0.0022) -[2024-09-22 12:13:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 3768320. Throughput: 0: 954.0. Samples: 941760. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:13:40,259][00564] Avg episode reward: [(0, '23.882')] -[2024-09-22 12:13:45,257][00564] Fps is (10 sec: 4099.2, 60 sec: 3891.6, 300 sec: 3915.5). Total num frames: 3788800. Throughput: 0: 951.8. Samples: 945118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:13:45,259][00564] Avg episode reward: [(0, '23.351')] -[2024-09-22 12:13:50,258][00564] Fps is (10 sec: 3686.1, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 3805184. Throughput: 0: 990.2. Samples: 951200. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:13:50,266][00564] Avg episode reward: [(0, '22.978')] -[2024-09-22 12:13:50,682][02939] Updated weights for policy 0, policy_version 930 (0.0016) -[2024-09-22 12:13:55,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 3821568. Throughput: 0: 943.9. Samples: 955676. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:13:55,267][00564] Avg episode reward: [(0, '21.939')] -[2024-09-22 12:14:00,257][00564] Fps is (10 sec: 4096.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 3846144. Throughput: 0: 944.5. Samples: 959160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:14:00,261][00564] Avg episode reward: [(0, '22.123')] -[2024-09-22 12:14:00,694][02939] Updated weights for policy 0, policy_version 940 (0.0034) -[2024-09-22 12:14:05,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 3866624. Throughput: 0: 985.5. Samples: 965984. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:14:05,262][00564] Avg episode reward: [(0, '23.629')] -[2024-09-22 12:14:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 3878912. Throughput: 0: 952.4. Samples: 970362. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:14:10,263][00564] Avg episode reward: [(0, '23.958')] -[2024-09-22 12:14:12,330][02939] Updated weights for policy 0, policy_version 950 (0.0015) -[2024-09-22 12:14:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 3903488. Throughput: 0: 940.8. Samples: 973278. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) -[2024-09-22 12:14:15,264][00564] Avg episode reward: [(0, '24.731')] -[2024-09-22 12:14:20,258][00564] Fps is (10 sec: 4505.2, 60 sec: 3959.4, 300 sec: 3915.5). Total num frames: 3923968. Throughput: 0: 959.8. Samples: 979910. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:14:20,264][00564] Avg episode reward: [(0, '26.122')] -[2024-09-22 12:14:20,266][02925] Saving new best policy, reward=26.122! -[2024-09-22 12:14:22,355][02939] Updated weights for policy 0, policy_version 960 (0.0029) -[2024-09-22 12:14:25,259][00564] Fps is (10 sec: 3276.1, 60 sec: 3754.5, 300 sec: 3901.6). Total num frames: 3936256. Throughput: 0: 956.2. Samples: 984792. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) -[2024-09-22 12:14:25,268][00564] Avg episode reward: [(0, '26.457')] -[2024-09-22 12:14:25,292][02925] Saving new best policy, reward=26.457! -[2024-09-22 12:14:30,257][00564] Fps is (10 sec: 3277.1, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 3956736. Throughput: 0: 934.2. Samples: 987156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) -[2024-09-22 12:14:30,262][00564] Avg episode reward: [(0, '26.405')] -[2024-09-22 12:14:33,018][02939] Updated weights for policy 0, policy_version 970 (0.0030) -[2024-09-22 12:14:35,257][00564] Fps is (10 sec: 4506.6, 60 sec: 3891.7, 300 sec: 3901.6). Total num frames: 3981312. Throughput: 0: 953.2. Samples: 994094. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) -[2024-09-22 12:14:35,259][00564] Avg episode reward: [(0, '25.965')] -[2024-09-22 12:14:40,259][00564] Fps is (10 sec: 4095.2, 60 sec: 3822.8, 300 sec: 3915.5). Total num frames: 3997696. Throughput: 0: 979.1. Samples: 999736. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) -[2024-09-22 12:14:40,261][00564] Avg episode reward: [(0, '25.226')] -[2024-09-22 12:14:42,516][02925] Stopping Batcher_0... -[2024-09-22 12:14:42,516][02925] Loop batcher_evt_loop terminating... -[2024-09-22 12:14:42,516][00564] Component Batcher_0 stopped! -[2024-09-22 12:14:42,521][00564] Component RolloutWorker_w0 process died already! Don't wait for it. -[2024-09-22 12:14:42,524][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... -[2024-09-22 12:14:42,614][02939] Weights refcount: 2 0 -[2024-09-22 12:14:42,621][02939] Stopping InferenceWorker_p0-w0... -[2024-09-22 12:14:42,621][02939] Loop inference_proc0-0_evt_loop terminating... -[2024-09-22 12:14:42,621][00564] Component InferenceWorker_p0-w0 stopped! -[2024-09-22 12:14:42,680][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000753_3084288.pth -[2024-09-22 12:14:42,691][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... -[2024-09-22 12:14:43,000][02925] Stopping LearnerWorker_p0... -[2024-09-22 12:14:43,001][02925] Loop learner_proc0_evt_loop terminating... -[2024-09-22 12:14:43,001][00564] Component LearnerWorker_p0 stopped! -[2024-09-22 12:14:43,030][00564] Component RolloutWorker_w3 stopped! -[2024-09-22 12:14:43,035][02942] Stopping RolloutWorker_w3... -[2024-09-22 12:14:43,036][02942] Loop rollout_proc3_evt_loop terminating... -[2024-09-22 12:14:43,051][00564] Component RolloutWorker_w7 stopped! -[2024-09-22 12:14:43,057][02945] Stopping RolloutWorker_w7... -[2024-09-22 12:14:43,058][02945] Loop rollout_proc7_evt_loop terminating... -[2024-09-22 12:14:43,084][00564] Component RolloutWorker_w5 stopped! -[2024-09-22 12:14:43,090][02943] Stopping RolloutWorker_w5... -[2024-09-22 12:14:43,091][02943] Loop rollout_proc5_evt_loop terminating... -[2024-09-22 12:14:43,101][00564] Component RolloutWorker_w1 stopped! -[2024-09-22 12:14:43,109][02940] Stopping RolloutWorker_w1... -[2024-09-22 12:14:43,110][02940] Loop rollout_proc1_evt_loop terminating... -[2024-09-22 12:14:43,128][02944] Stopping RolloutWorker_w4... -[2024-09-22 12:14:43,130][02944] Loop rollout_proc4_evt_loop terminating... -[2024-09-22 12:14:43,128][00564] Component RolloutWorker_w4 stopped! -[2024-09-22 12:14:43,149][02946] Stopping RolloutWorker_w6... -[2024-09-22 12:14:43,148][00564] Component RolloutWorker_w6 stopped! -[2024-09-22 12:14:43,152][02946] Loop rollout_proc6_evt_loop terminating... -[2024-09-22 12:14:43,185][02941] Stopping RolloutWorker_w2... -[2024-09-22 12:14:43,184][00564] Component RolloutWorker_w2 stopped! -[2024-09-22 12:14:43,186][00564] Waiting for process learner_proc0 to stop... -[2024-09-22 12:14:43,189][02941] Loop rollout_proc2_evt_loop terminating... -[2024-09-22 12:14:44,440][00564] Waiting for process inference_proc0-0 to join... -[2024-09-22 12:14:44,443][00564] Waiting for process rollout_proc0 to join... -[2024-09-22 12:14:44,450][00564] Waiting for process rollout_proc1 to join... -[2024-09-22 12:14:46,279][00564] Waiting for process rollout_proc2 to join... -[2024-09-22 12:14:46,282][00564] Waiting for process rollout_proc3 to join... -[2024-09-22 12:14:46,288][00564] Waiting for process rollout_proc4 to join... -[2024-09-22 12:14:46,290][00564] Waiting for process rollout_proc5 to join... -[2024-09-22 12:14:46,293][00564] Waiting for process rollout_proc6 to join... -[2024-09-22 12:14:46,297][00564] Waiting for process rollout_proc7 to join... -[2024-09-22 12:14:46,299][00564] Batcher 0 profile tree view: -batching: 24.6516, releasing_batches: 0.0268 -[2024-09-22 12:14:46,301][00564] InferenceWorker_p0-w0 profile tree view: -wait_policy: 0.0001 - wait_policy_total: 408.5732 -update_model: 9.2120 - weight_update: 0.0017 -one_step: 0.0226 - handle_policy_step: 591.5292 - deserialize: 14.3376, stack: 3.2877, obs_to_device_normalize: 121.9759, forward: 319.5675, send_messages: 25.8998 - prepare_outputs: 77.5571 - to_cpu: 45.2189 -[2024-09-22 12:14:46,303][00564] Learner 0 profile tree view: -misc: 0.0052, prepare_batch: 13.5165 -train: 71.9820 - epoch_init: 0.0058, minibatch_init: 0.0111, losses_postprocess: 0.5760, kl_divergence: 0.5258, after_optimizer: 33.3866 - calculate_losses: 25.1242 - losses_init: 0.0294, forward_head: 1.2535, bptt_initial: 16.9256, tail: 1.0223, advantages_returns: 0.2459, losses: 3.6640 - bptt: 1.7097 - bptt_forward_core: 1.6222 - update: 11.7491 - clip: 0.8856 -[2024-09-22 12:14:46,304][00564] RolloutWorker_w7 profile tree view: -wait_for_trajectories: 0.3666, enqueue_policy_requests: 95.0201, env_step: 824.7435, overhead: 14.1138, complete_rollouts: 8.0628 -save_policy_outputs: 22.4268 - split_output_tensors: 8.8564 -[2024-09-22 12:14:46,306][00564] Loop Runner_EvtLoop terminating... -[2024-09-22 12:14:46,309][00564] Runner profile tree view: -main_loop: 1075.4156 -[2024-09-22 12:14:46,310][00564] Collected {0: 4005888}, FPS: 3725.0 -[2024-09-22 12:19:19,267][00564] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-22 12:19:19,268][00564] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-22 12:19:19,271][00564] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-22 12:19:19,273][00564] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-22 12:19:19,275][00564] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-22 12:19:19,278][00564] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-22 12:19:19,279][00564] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! -[2024-09-22 12:19:19,281][00564] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-22 12:19:19,283][00564] Adding new argument 'push_to_hub'=False that is not in the saved config file! -[2024-09-22 12:19:19,284][00564] Adding new argument 'hf_repository'=None that is not in the saved config file! -[2024-09-22 12:19:19,285][00564] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-22 12:19:19,287][00564] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-22 12:19:19,288][00564] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-22 12:19:19,290][00564] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-22 12:19:19,291][00564] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-22 12:19:19,323][00564] Doom resolution: 160x120, resize resolution: (128, 72) -[2024-09-22 12:19:19,326][00564] RunningMeanStd input shape: (3, 72, 128) -[2024-09-22 12:19:19,329][00564] RunningMeanStd input shape: (1,) -[2024-09-22 12:19:19,346][00564] ConvEncoder: input_channels=3 -[2024-09-22 12:19:19,446][00564] Conv encoder output size: 512 -[2024-09-22 12:19:19,447][00564] Policy head output size: 512 -[2024-09-22 12:19:19,632][00564] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... -[2024-09-22 12:19:20,467][00564] Num frames 100... -[2024-09-22 12:19:20,590][00564] Num frames 200... -[2024-09-22 12:19:20,712][00564] Num frames 300... -[2024-09-22 12:19:20,876][00564] Avg episode rewards: #0: 6.890, true rewards: #0: 3.890 -[2024-09-22 12:19:20,878][00564] Avg episode reward: 6.890, avg true_objective: 3.890 -[2024-09-22 12:19:20,895][00564] Num frames 400... -[2024-09-22 12:19:21,018][00564] Num frames 500... -[2024-09-22 12:19:21,146][00564] Num frames 600... -[2024-09-22 12:19:21,273][00564] Num frames 700... -[2024-09-22 12:19:21,397][00564] Num frames 800... -[2024-09-22 12:19:21,521][00564] Num frames 900... -[2024-09-22 12:19:21,655][00564] Num frames 1000... -[2024-09-22 12:19:21,794][00564] Num frames 1100... -[2024-09-22 12:19:21,919][00564] Num frames 1200... -[2024-09-22 12:19:22,072][00564] Avg episode rewards: #0: 12.910, true rewards: #0: 6.410 -[2024-09-22 12:19:22,073][00564] Avg episode reward: 12.910, avg true_objective: 6.410 -[2024-09-22 12:19:22,104][00564] Num frames 1300... -[2024-09-22 12:19:22,223][00564] Num frames 1400... -[2024-09-22 12:19:22,349][00564] Num frames 1500... -[2024-09-22 12:19:22,470][00564] Num frames 1600... -[2024-09-22 12:19:22,591][00564] Num frames 1700... -[2024-09-22 12:19:22,716][00564] Num frames 1800... -[2024-09-22 12:19:22,809][00564] Avg episode rewards: #0: 11.753, true rewards: #0: 6.087 -[2024-09-22 12:19:22,810][00564] Avg episode reward: 11.753, avg true_objective: 6.087 -[2024-09-22 12:19:22,903][00564] Num frames 1900... -[2024-09-22 12:19:23,021][00564] Num frames 2000... -[2024-09-22 12:19:23,141][00564] Num frames 2100... -[2024-09-22 12:19:23,267][00564] Num frames 2200... -[2024-09-22 12:19:23,392][00564] Num frames 2300... -[2024-09-22 12:19:23,515][00564] Num frames 2400... -[2024-09-22 12:19:23,635][00564] Num frames 2500... -[2024-09-22 12:19:23,766][00564] Num frames 2600... -[2024-09-22 12:19:23,886][00564] Num frames 2700... -[2024-09-22 12:19:24,003][00564] Num frames 2800... -[2024-09-22 12:19:24,127][00564] Num frames 2900... -[2024-09-22 12:19:24,247][00564] Num frames 3000... -[2024-09-22 12:19:24,379][00564] Num frames 3100... -[2024-09-22 12:19:24,499][00564] Num frames 3200... -[2024-09-22 12:19:24,621][00564] Num frames 3300... -[2024-09-22 12:19:24,749][00564] Num frames 3400... -[2024-09-22 12:19:24,869][00564] Num frames 3500... -[2024-09-22 12:19:24,985][00564] Num frames 3600... -[2024-09-22 12:19:25,104][00564] Num frames 3700... -[2024-09-22 12:19:25,221][00564] Num frames 3800... -[2024-09-22 12:19:25,327][00564] Avg episode rewards: #0: 19.855, true rewards: #0: 9.605 -[2024-09-22 12:19:25,329][00564] Avg episode reward: 19.855, avg true_objective: 9.605 -[2024-09-22 12:19:25,400][00564] Num frames 3900... -[2024-09-22 12:19:25,522][00564] Num frames 4000... -[2024-09-22 12:19:25,643][00564] Num frames 4100... -[2024-09-22 12:19:25,769][00564] Num frames 4200... -[2024-09-22 12:19:25,887][00564] Num frames 4300... -[2024-09-22 12:19:26,008][00564] Num frames 4400... -[2024-09-22 12:19:26,126][00564] Num frames 4500... -[2024-09-22 12:19:26,247][00564] Num frames 4600... -[2024-09-22 12:19:26,376][00564] Num frames 4700... -[2024-09-22 12:19:26,523][00564] Avg episode rewards: #0: 20.140, true rewards: #0: 9.540 -[2024-09-22 12:19:26,524][00564] Avg episode reward: 20.140, avg true_objective: 9.540 -[2024-09-22 12:19:26,564][00564] Num frames 4800... -[2024-09-22 12:19:26,680][00564] Num frames 4900... -[2024-09-22 12:19:26,839][00564] Num frames 5000... -[2024-09-22 12:19:27,007][00564] Num frames 5100... -[2024-09-22 12:19:27,170][00564] Num frames 5200... -[2024-09-22 12:19:27,331][00564] Num frames 5300... -[2024-09-22 12:19:27,498][00564] Num frames 5400... -[2024-09-22 12:19:27,662][00564] Num frames 5500... -[2024-09-22 12:19:27,821][00564] Num frames 5600... -[2024-09-22 12:19:27,995][00564] Num frames 5700... -[2024-09-22 12:19:28,161][00564] Num frames 5800... -[2024-09-22 12:19:28,270][00564] Avg episode rewards: #0: 20.883, true rewards: #0: 9.717 -[2024-09-22 12:19:28,272][00564] Avg episode reward: 20.883, avg true_objective: 9.717 -[2024-09-22 12:19:28,390][00564] Num frames 5900... -[2024-09-22 12:19:28,574][00564] Num frames 6000... -[2024-09-22 12:19:28,754][00564] Num frames 6100... -[2024-09-22 12:19:28,932][00564] Num frames 6200... -[2024-09-22 12:19:29,107][00564] Num frames 6300... -[2024-09-22 12:19:29,249][00564] Num frames 6400... -[2024-09-22 12:19:29,373][00564] Num frames 6500... -[2024-09-22 12:19:29,510][00564] Num frames 6600... -[2024-09-22 12:19:29,633][00564] Num frames 6700... -[2024-09-22 12:19:29,765][00564] Num frames 6800... -[2024-09-22 12:19:29,890][00564] Num frames 6900... -[2024-09-22 12:19:30,014][00564] Num frames 7000... -[2024-09-22 12:19:30,135][00564] Num frames 7100... -[2024-09-22 12:19:30,256][00564] Num frames 7200... -[2024-09-22 12:19:30,382][00564] Num frames 7300... -[2024-09-22 12:19:30,511][00564] Num frames 7400... -[2024-09-22 12:19:30,636][00564] Num frames 7500... -[2024-09-22 12:19:30,765][00564] Num frames 7600... -[2024-09-22 12:19:30,886][00564] Num frames 7700... -[2024-09-22 12:19:31,004][00564] Avg episode rewards: #0: 25.357, true rewards: #0: 11.071 -[2024-09-22 12:19:31,005][00564] Avg episode reward: 25.357, avg true_objective: 11.071 -[2024-09-22 12:19:31,067][00564] Num frames 7800... -[2024-09-22 12:19:31,184][00564] Num frames 7900... -[2024-09-22 12:19:31,308][00564] Num frames 8000... -[2024-09-22 12:19:31,427][00564] Num frames 8100... -[2024-09-22 12:19:31,557][00564] Num frames 8200... -[2024-09-22 12:19:31,680][00564] Num frames 8300... -[2024-09-22 12:19:31,808][00564] Num frames 8400... -[2024-09-22 12:19:31,940][00564] Num frames 8500... -[2024-09-22 12:19:32,060][00564] Num frames 8600... -[2024-09-22 12:19:32,178][00564] Num frames 8700... -[2024-09-22 12:19:32,302][00564] Num frames 8800... -[2024-09-22 12:19:32,404][00564] Avg episode rewards: #0: 25.422, true rewards: #0: 11.047 -[2024-09-22 12:19:32,405][00564] Avg episode reward: 25.422, avg true_objective: 11.047 -[2024-09-22 12:19:32,485][00564] Num frames 8900... -[2024-09-22 12:19:32,613][00564] Num frames 9000... -[2024-09-22 12:19:32,742][00564] Num frames 9100... -[2024-09-22 12:19:32,867][00564] Num frames 9200... -[2024-09-22 12:19:32,986][00564] Num frames 9300... -[2024-09-22 12:19:33,106][00564] Num frames 9400... -[2024-09-22 12:19:33,229][00564] Num frames 9500... -[2024-09-22 12:19:33,349][00564] Num frames 9600... -[2024-09-22 12:19:33,468][00564] Num frames 9700... -[2024-09-22 12:19:33,598][00564] Num frames 9800... -[2024-09-22 12:19:33,728][00564] Num frames 9900... -[2024-09-22 12:19:33,851][00564] Num frames 10000... -[2024-09-22 12:19:33,978][00564] Num frames 10100... -[2024-09-22 12:19:34,099][00564] Num frames 10200... -[2024-09-22 12:19:34,220][00564] Num frames 10300... -[2024-09-22 12:19:34,343][00564] Num frames 10400... -[2024-09-22 12:19:34,463][00564] Num frames 10500... -[2024-09-22 12:19:34,588][00564] Avg episode rewards: #0: 27.280, true rewards: #0: 11.724 -[2024-09-22 12:19:34,590][00564] Avg episode reward: 27.280, avg true_objective: 11.724 -[2024-09-22 12:19:34,650][00564] Num frames 10600... -[2024-09-22 12:19:34,779][00564] Num frames 10700... -[2024-09-22 12:19:34,904][00564] Num frames 10800... -[2024-09-22 12:19:35,021][00564] Num frames 10900... -[2024-09-22 12:19:35,142][00564] Num frames 11000... -[2024-09-22 12:19:35,264][00564] Num frames 11100... -[2024-09-22 12:19:35,384][00564] Num frames 11200... -[2024-09-22 12:19:35,508][00564] Num frames 11300... -[2024-09-22 12:19:35,637][00564] Num frames 11400... -[2024-09-22 12:19:35,762][00564] Num frames 11500... -[2024-09-22 12:19:35,882][00564] Num frames 11600... -[2024-09-22 12:19:36,003][00564] Num frames 11700... -[2024-09-22 12:19:36,121][00564] Num frames 11800... -[2024-09-22 12:19:36,244][00564] Num frames 11900... -[2024-09-22 12:19:36,364][00564] Num frames 12000... -[2024-09-22 12:19:36,483][00564] Num frames 12100... -[2024-09-22 12:19:36,608][00564] Num frames 12200... -[2024-09-22 12:19:36,748][00564] Num frames 12300... -[2024-09-22 12:19:36,869][00564] Num frames 12400... -[2024-09-22 12:19:36,990][00564] Num frames 12500... -[2024-09-22 12:19:37,117][00564] Avg episode rewards: #0: 29.660, true rewards: #0: 12.560 -[2024-09-22 12:19:37,118][00564] Avg episode reward: 29.660, avg true_objective: 12.560 -[2024-09-22 12:20:51,928][00564] Replay video saved to /content/train_dir/default_experiment/replay.mp4! -[2024-09-22 12:25:52,984][00564] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json -[2024-09-22 12:25:52,986][00564] Overriding arg 'num_workers' with value 1 passed from command line -[2024-09-22 12:25:52,988][00564] Adding new argument 'no_render'=True that is not in the saved config file! -[2024-09-22 12:25:52,990][00564] Adding new argument 'save_video'=True that is not in the saved config file! -[2024-09-22 12:25:52,991][00564] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! -[2024-09-22 12:25:52,993][00564] Adding new argument 'video_name'=None that is not in the saved config file! -[2024-09-22 12:25:52,995][00564] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! -[2024-09-22 12:25:52,997][00564] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! -[2024-09-22 12:25:52,997][00564] Adding new argument 'push_to_hub'=True that is not in the saved config file! -[2024-09-22 12:25:52,998][00564] Adding new argument 'hf_repository'='kalmi901/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! -[2024-09-22 12:25:53,000][00564] Adding new argument 'policy_index'=0 that is not in the saved config file! -[2024-09-22 12:25:53,000][00564] Adding new argument 'eval_deterministic'=False that is not in the saved config file! -[2024-09-22 12:25:53,001][00564] Adding new argument 'train_script'=None that is not in the saved config file! -[2024-09-22 12:25:53,002][00564] Adding new argument 'enjoy_script'=None that is not in the saved config file! -[2024-09-22 12:25:53,003][00564] Using frameskip 1 and render_action_repeat=4 for evaluation -[2024-09-22 12:25:53,030][00564] RunningMeanStd input shape: (3, 72, 128) -[2024-09-22 12:25:53,032][00564] RunningMeanStd input shape: (1,) -[2024-09-22 12:25:53,044][00564] ConvEncoder: input_channels=3 -[2024-09-22 12:25:53,079][00564] Conv encoder output size: 512 -[2024-09-22 12:25:53,081][00564] Policy head output size: 512 -[2024-09-22 12:25:53,099][00564] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... -[2024-09-22 12:25:53,518][00564] Num frames 100... -[2024-09-22 12:25:53,648][00564] Num frames 200... -[2024-09-22 12:25:53,784][00564] Num frames 300... -[2024-09-22 12:25:53,911][00564] Num frames 400... -[2024-09-22 12:25:54,031][00564] Num frames 500... -[2024-09-22 12:25:54,150][00564] Num frames 600... -[2024-09-22 12:25:54,295][00564] Avg episode rewards: #0: 11.720, true rewards: #0: 6.720 -[2024-09-22 12:25:54,296][00564] Avg episode reward: 11.720, avg true_objective: 6.720 -[2024-09-22 12:25:54,335][00564] Num frames 700... -[2024-09-22 12:25:54,457][00564] Num frames 800... -[2024-09-22 12:25:54,591][00564] Num frames 900... -[2024-09-22 12:25:54,711][00564] Num frames 1000... -[2024-09-22 12:25:54,838][00564] Num frames 1100... -[2024-09-22 12:25:54,973][00564] Avg episode rewards: #0: 11.330, true rewards: #0: 5.830 -[2024-09-22 12:25:54,975][00564] Avg episode reward: 11.330, avg true_objective: 5.830 -[2024-09-22 12:25:55,018][00564] Num frames 1200... -[2024-09-22 12:25:55,156][00564] Num frames 1300... -[2024-09-22 12:25:55,330][00564] Num frames 1400... -[2024-09-22 12:25:55,498][00564] Num frames 1500... -[2024-09-22 12:25:55,664][00564] Num frames 1600... -[2024-09-22 12:25:55,832][00564] Num frames 1700... -[2024-09-22 12:25:55,998][00564] Num frames 1800... -[2024-09-22 12:25:56,159][00564] Num frames 1900... -[2024-09-22 12:25:56,318][00564] Num frames 2000... -[2024-09-22 12:25:56,486][00564] Num frames 2100... -[2024-09-22 12:25:56,667][00564] Num frames 2200... -[2024-09-22 12:25:56,865][00564] Num frames 2300... -[2024-09-22 12:25:57,045][00564] Num frames 2400... -[2024-09-22 12:25:57,219][00564] Num frames 2500... -[2024-09-22 12:25:57,391][00564] Num frames 2600... -[2024-09-22 12:25:57,566][00564] Num frames 2700... -[2024-09-22 12:25:57,753][00564] Num frames 2800... -[2024-09-22 12:25:57,818][00564] Avg episode rewards: #0: 21.687, true rewards: #0: 9.353 -[2024-09-22 12:25:57,820][00564] Avg episode reward: 21.687, avg true_objective: 9.353 -[2024-09-22 12:25:57,930][00564] Num frames 2900... -[2024-09-22 12:25:58,046][00564] Num frames 3000... -[2024-09-22 12:25:58,168][00564] Num frames 3100... -[2024-09-22 12:25:58,286][00564] Num frames 3200... -[2024-09-22 12:25:58,408][00564] Num frames 3300... -[2024-09-22 12:25:58,527][00564] Num frames 3400... -[2024-09-22 12:25:58,654][00564] Num frames 3500... -[2024-09-22 12:25:58,808][00564] Avg episode rewards: #0: 21.185, true rewards: #0: 8.935 -[2024-09-22 12:25:58,809][00564] Avg episode reward: 21.185, avg true_objective: 8.935 -[2024-09-22 12:25:58,845][00564] Num frames 3600... -[2024-09-22 12:25:58,962][00564] Num frames 3700... -[2024-09-22 12:25:59,081][00564] Num frames 3800... -[2024-09-22 12:25:59,201][00564] Num frames 3900... -[2024-09-22 12:25:59,321][00564] Num frames 4000... -[2024-09-22 12:25:59,442][00564] Num frames 4100... -[2024-09-22 12:25:59,563][00564] Num frames 4200... -[2024-09-22 12:25:59,692][00564] Num frames 4300... -[2024-09-22 12:25:59,822][00564] Num frames 4400... -[2024-09-22 12:25:59,942][00564] Num frames 4500... -[2024-09-22 12:26:00,059][00564] Num frames 4600... -[2024-09-22 12:26:00,235][00564] Avg episode rewards: #0: 21.794, true rewards: #0: 9.394 -[2024-09-22 12:26:00,237][00564] Avg episode reward: 21.794, avg true_objective: 9.394 -[2024-09-22 12:26:00,243][00564] Num frames 4700... -[2024-09-22 12:26:00,371][00564] Num frames 4800... -[2024-09-22 12:26:00,491][00564] Num frames 4900... -[2024-09-22 12:26:00,608][00564] Num frames 5000... -[2024-09-22 12:26:00,743][00564] Num frames 5100... -[2024-09-22 12:26:00,861][00564] Num frames 5200... -[2024-09-22 12:26:00,979][00564] Num frames 5300... -[2024-09-22 12:26:01,098][00564] Num frames 5400... -[2024-09-22 12:26:01,217][00564] Num frames 5500... -[2024-09-22 12:26:01,338][00564] Num frames 5600... -[2024-09-22 12:26:01,461][00564] Num frames 5700... -[2024-09-22 12:26:01,580][00564] Num frames 5800... -[2024-09-22 12:26:01,714][00564] Num frames 5900... -[2024-09-22 12:26:01,838][00564] Num frames 6000... -[2024-09-22 12:26:01,961][00564] Num frames 6100... -[2024-09-22 12:26:02,083][00564] Num frames 6200... -[2024-09-22 12:26:02,203][00564] Num frames 6300... -[2024-09-22 12:26:02,328][00564] Num frames 6400... -[2024-09-22 12:26:02,448][00564] Num frames 6500... -[2024-09-22 12:26:02,568][00564] Num frames 6600... -[2024-09-22 12:26:02,692][00564] Num frames 6700... -[2024-09-22 12:26:02,880][00564] Avg episode rewards: #0: 27.328, true rewards: #0: 11.328 -[2024-09-22 12:26:02,881][00564] Avg episode reward: 27.328, avg true_objective: 11.328 -[2024-09-22 12:26:02,889][00564] Num frames 6800... -[2024-09-22 12:26:03,009][00564] Num frames 6900... -[2024-09-22 12:26:03,126][00564] Num frames 7000... -[2024-09-22 12:26:03,247][00564] Num frames 7100... -[2024-09-22 12:26:03,367][00564] Num frames 7200... -[2024-09-22 12:26:03,498][00564] Num frames 7300... -[2024-09-22 12:26:03,620][00564] Num frames 7400... -[2024-09-22 12:26:03,746][00564] Num frames 7500... -[2024-09-22 12:26:03,876][00564] Num frames 7600... -[2024-09-22 12:26:03,998][00564] Num frames 7700... -[2024-09-22 12:26:04,115][00564] Num frames 7800... -[2024-09-22 12:26:04,236][00564] Num frames 7900... -[2024-09-22 12:26:04,355][00564] Num frames 8000... -[2024-09-22 12:26:04,466][00564] Avg episode rewards: #0: 27.493, true rewards: #0: 11.493 -[2024-09-22 12:26:04,468][00564] Avg episode reward: 27.493, avg true_objective: 11.493 -[2024-09-22 12:26:04,533][00564] Num frames 8100... -[2024-09-22 12:26:04,651][00564] Num frames 8200... -[2024-09-22 12:26:04,780][00564] Num frames 8300... -[2024-09-22 12:26:04,905][00564] Num frames 8400... -[2024-09-22 12:26:05,024][00564] Num frames 8500... -[2024-09-22 12:26:05,145][00564] Num frames 8600... -[2024-09-22 12:26:05,265][00564] Num frames 8700... -[2024-09-22 12:26:05,383][00564] Num frames 8800... -[2024-09-22 12:26:05,506][00564] Num frames 8900... -[2024-09-22 12:26:05,627][00564] Num frames 9000... -[2024-09-22 12:26:05,759][00564] Num frames 9100... -[2024-09-22 12:26:05,889][00564] Num frames 9200... -[2024-09-22 12:26:06,054][00564] Avg episode rewards: #0: 27.366, true rewards: #0: 11.616 -[2024-09-22 12:26:06,056][00564] Avg episode reward: 27.366, avg true_objective: 11.616 -[2024-09-22 12:26:06,067][00564] Num frames 9300... -[2024-09-22 12:26:06,188][00564] Num frames 9400... -[2024-09-22 12:26:06,305][00564] Num frames 9500... -[2024-09-22 12:26:06,425][00564] Num frames 9600... -[2024-09-22 12:26:06,545][00564] Num frames 9700... -[2024-09-22 12:26:06,662][00564] Num frames 9800... -[2024-09-22 12:26:06,790][00564] Num frames 9900... -[2024-09-22 12:26:06,849][00564] Avg episode rewards: #0: 25.446, true rewards: #0: 11.001 -[2024-09-22 12:26:06,850][00564] Avg episode reward: 25.446, avg true_objective: 11.001 -[2024-09-22 12:26:06,965][00564] Num frames 10000... -[2024-09-22 12:26:07,096][00564] Num frames 10100... -[2024-09-22 12:26:07,212][00564] Num frames 10200... -[2024-09-22 12:26:07,330][00564] Num frames 10300... -[2024-09-22 12:26:07,446][00564] Num frames 10400... -[2024-09-22 12:26:07,563][00564] Num frames 10500... -[2024-09-22 12:26:07,684][00564] Num frames 10600... -[2024-09-22 12:26:07,747][00564] Avg episode rewards: #0: 24.005, true rewards: #0: 10.605 -[2024-09-22 12:26:07,749][00564] Avg episode reward: 24.005, avg true_objective: 10.605 -[2024-09-22 12:27:09,362][00564] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-22 15:26:53,257][02352] Using optimizer +[2024-09-22 15:26:53,954][02352] No checkpoints found +[2024-09-22 15:26:53,954][02352] Did not load from checkpoint, starting from scratch! +[2024-09-22 15:26:53,954][02352] Initialized policy 0 weights for model version 0 +[2024-09-22 15:26:53,958][02352] LearnerWorker_p0 finished initialization! +[2024-09-22 15:26:53,959][02352] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 15:26:54,056][02365] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 15:26:54,058][02365] RunningMeanStd input shape: (1,) +[2024-09-22 15:26:54,069][02365] ConvEncoder: input_channels=3 +[2024-09-22 15:26:54,171][02365] Conv encoder output size: 512 +[2024-09-22 15:26:54,171][02365] Policy head output size: 512 +[2024-09-22 15:26:54,222][00338] Inference worker 0-0 is ready! +[2024-09-22 15:26:54,223][00338] All inference workers are ready! Signal rollout workers to start! +[2024-09-22 15:26:54,427][02367] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:54,426][02369] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:54,431][02366] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:54,429][02370] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:54,425][02373] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:54,432][02371] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:54,434][02372] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:54,433][02368] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 15:26:55,567][00338] Heartbeat connected on Batcher_0 +[2024-09-22 15:26:55,574][00338] Heartbeat connected on LearnerWorker_p0 +[2024-09-22 15:26:55,626][00338] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-22 15:26:55,888][02369] Decorrelating experience for 0 frames... +[2024-09-22 15:26:55,892][02367] Decorrelating experience for 0 frames... +[2024-09-22 15:26:55,902][02370] Decorrelating experience for 0 frames... +[2024-09-22 15:26:56,161][02372] Decorrelating experience for 0 frames... +[2024-09-22 15:26:56,169][02368] Decorrelating experience for 0 frames... +[2024-09-22 15:26:56,167][02366] Decorrelating experience for 0 frames... +[2024-09-22 15:26:56,171][02371] Decorrelating experience for 0 frames... +[2024-09-22 15:26:57,620][02371] Decorrelating experience for 32 frames... +[2024-09-22 15:26:57,625][02366] Decorrelating experience for 32 frames... +[2024-09-22 15:26:57,629][02372] Decorrelating experience for 32 frames... +[2024-09-22 15:26:57,908][02370] Decorrelating experience for 32 frames... +[2024-09-22 15:26:57,926][02367] Decorrelating experience for 32 frames... +[2024-09-22 15:26:58,334][02369] Decorrelating experience for 32 frames... +[2024-09-22 15:26:58,839][00338] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-22 15:26:59,809][02368] Decorrelating experience for 32 frames... +[2024-09-22 15:26:59,870][02373] Decorrelating experience for 0 frames... +[2024-09-22 15:27:00,049][02371] Decorrelating experience for 64 frames... +[2024-09-22 15:27:00,055][02372] Decorrelating experience for 64 frames... +[2024-09-22 15:27:00,078][02366] Decorrelating experience for 64 frames... +[2024-09-22 15:27:00,667][02369] Decorrelating experience for 64 frames... +[2024-09-22 15:27:01,506][02367] Decorrelating experience for 64 frames... +[2024-09-22 15:27:01,545][02372] Decorrelating experience for 96 frames... +[2024-09-22 15:27:01,547][02371] Decorrelating experience for 96 frames... +[2024-09-22 15:27:01,795][02373] Decorrelating experience for 32 frames... +[2024-09-22 15:27:01,863][00338] Heartbeat connected on RolloutWorker_w4 +[2024-09-22 15:27:01,868][00338] Heartbeat connected on RolloutWorker_w6 +[2024-09-22 15:27:02,542][02369] Decorrelating experience for 96 frames... +[2024-09-22 15:27:02,589][02368] Decorrelating experience for 64 frames... +[2024-09-22 15:27:02,778][00338] Heartbeat connected on RolloutWorker_w3 +[2024-09-22 15:27:03,194][02370] Decorrelating experience for 64 frames... +[2024-09-22 15:27:03,378][02367] Decorrelating experience for 96 frames... +[2024-09-22 15:27:03,635][00338] Heartbeat connected on RolloutWorker_w1 +[2024-09-22 15:27:03,757][02366] Decorrelating experience for 96 frames... +[2024-09-22 15:27:03,841][00338] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-22 15:27:03,860][02368] Decorrelating experience for 96 frames... +[2024-09-22 15:27:03,961][02373] Decorrelating experience for 64 frames... +[2024-09-22 15:27:04,057][00338] Heartbeat connected on RolloutWorker_w0 +[2024-09-22 15:27:04,284][00338] Heartbeat connected on RolloutWorker_w2 +[2024-09-22 15:27:04,450][02370] Decorrelating experience for 96 frames... +[2024-09-22 15:27:04,759][00338] Heartbeat connected on RolloutWorker_w5 +[2024-09-22 15:27:05,630][02373] Decorrelating experience for 96 frames... +[2024-09-22 15:27:05,863][00338] Heartbeat connected on RolloutWorker_w7 +[2024-09-22 15:27:07,287][02352] Signal inference workers to stop experience collection... +[2024-09-22 15:27:07,299][02365] InferenceWorker_p0-w0: stopping experience collection +[2024-09-22 15:27:08,839][00338] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 170.6. Samples: 1706. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-22 15:27:08,844][00338] Avg episode reward: [(0, '2.022')] +[2024-09-22 15:27:10,629][02352] Signal inference workers to resume experience collection... +[2024-09-22 15:27:10,630][02365] InferenceWorker_p0-w0: resuming experience collection +[2024-09-22 15:27:13,841][00338] Fps is (10 sec: 1638.5, 60 sec: 1092.2, 300 sec: 1092.2). Total num frames: 16384. Throughput: 0: 254.9. Samples: 3824. Policy #0 lag: (min: 0.0, avg: 0.6, max: 3.0) +[2024-09-22 15:27:13,843][00338] Avg episode reward: [(0, '3.218')] +[2024-09-22 15:27:18,842][00338] Fps is (10 sec: 2866.6, 60 sec: 1433.4, 300 sec: 1433.4). Total num frames: 28672. Throughput: 0: 378.6. Samples: 7572. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-22 15:27:18,850][00338] Avg episode reward: [(0, '3.953')] +[2024-09-22 15:27:21,698][02365] Updated weights for policy 0, policy_version 10 (0.0027) +[2024-09-22 15:27:23,839][00338] Fps is (10 sec: 3277.3, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 49152. Throughput: 0: 404.2. Samples: 10106. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-22 15:27:23,848][00338] Avg episode reward: [(0, '4.469')] +[2024-09-22 15:27:28,839][00338] Fps is (10 sec: 4096.9, 60 sec: 2321.1, 300 sec: 2321.1). Total num frames: 69632. Throughput: 0: 565.0. Samples: 16950. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:27:28,846][00338] Avg episode reward: [(0, '4.260')] +[2024-09-22 15:27:31,243][02365] Updated weights for policy 0, policy_version 20 (0.0035) +[2024-09-22 15:27:33,839][00338] Fps is (10 sec: 3686.4, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 86016. Throughput: 0: 637.4. Samples: 22308. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:27:33,844][00338] Avg episode reward: [(0, '4.321')] +[2024-09-22 15:27:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 2560.0, 300 sec: 2560.0). Total num frames: 102400. Throughput: 0: 608.7. Samples: 24348. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:27:38,845][00338] Avg episode reward: [(0, '4.430')] +[2024-09-22 15:27:38,853][02352] Saving new best policy, reward=4.430! +[2024-09-22 15:27:43,386][02365] Updated weights for policy 0, policy_version 30 (0.0020) +[2024-09-22 15:27:43,839][00338] Fps is (10 sec: 3686.4, 60 sec: 2730.7, 300 sec: 2730.7). Total num frames: 122880. Throughput: 0: 665.6. Samples: 29954. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:27:43,847][00338] Avg episode reward: [(0, '4.459')] +[2024-09-22 15:27:43,849][02352] Saving new best policy, reward=4.459! +[2024-09-22 15:27:48,845][00338] Fps is (10 sec: 4093.8, 60 sec: 2866.9, 300 sec: 2866.9). Total num frames: 143360. Throughput: 0: 802.4. Samples: 36112. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:27:48,849][00338] Avg episode reward: [(0, '4.504')] +[2024-09-22 15:27:48,859][02352] Saving new best policy, reward=4.504! +[2024-09-22 15:27:53,847][00338] Fps is (10 sec: 3274.3, 60 sec: 2829.6, 300 sec: 2829.6). Total num frames: 155648. Throughput: 0: 808.7. Samples: 38102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:27:53,849][00338] Avg episode reward: [(0, '4.535')] +[2024-09-22 15:27:53,854][02352] Saving new best policy, reward=4.535! +[2024-09-22 15:27:55,649][02365] Updated weights for policy 0, policy_version 40 (0.0019) +[2024-09-22 15:27:58,839][00338] Fps is (10 sec: 3278.6, 60 sec: 2935.5, 300 sec: 2935.5). Total num frames: 176128. Throughput: 0: 879.5. Samples: 43402. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:27:58,844][00338] Avg episode reward: [(0, '4.471')] +[2024-09-22 15:28:03,839][00338] Fps is (10 sec: 4509.0, 60 sec: 3345.2, 300 sec: 3087.8). Total num frames: 200704. Throughput: 0: 946.9. Samples: 50182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:28:03,844][00338] Avg episode reward: [(0, '4.358')] +[2024-09-22 15:28:05,066][02365] Updated weights for policy 0, policy_version 50 (0.0020) +[2024-09-22 15:28:08,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3042.7). Total num frames: 212992. Throughput: 0: 945.0. Samples: 52630. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:28:08,845][00338] Avg episode reward: [(0, '4.332')] +[2024-09-22 15:28:13,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3550.0, 300 sec: 3058.3). Total num frames: 229376. Throughput: 0: 886.8. Samples: 56854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:28:13,842][00338] Avg episode reward: [(0, '4.330')] +[2024-09-22 15:28:17,173][02365] Updated weights for policy 0, policy_version 60 (0.0032) +[2024-09-22 15:28:18,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3123.2). Total num frames: 249856. Throughput: 0: 908.9. Samples: 63210. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:28:18,847][00338] Avg episode reward: [(0, '4.348')] +[2024-09-22 15:28:23,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3180.4). Total num frames: 270336. Throughput: 0: 936.1. Samples: 66474. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:28:23,843][00338] Avg episode reward: [(0, '4.526')] +[2024-09-22 15:28:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3140.3). Total num frames: 282624. Throughput: 0: 904.6. Samples: 70662. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:28:28,844][00338] Avg episode reward: [(0, '4.573')] +[2024-09-22 15:28:28,863][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000069_282624.pth... +[2024-09-22 15:28:29,009][02352] Saving new best policy, reward=4.573! +[2024-09-22 15:28:29,342][02365] Updated weights for policy 0, policy_version 70 (0.0042) +[2024-09-22 15:28:33,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3233.7). Total num frames: 307200. Throughput: 0: 900.7. Samples: 76638. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:28:33,842][00338] Avg episode reward: [(0, '4.466')] +[2024-09-22 15:28:38,297][02365] Updated weights for policy 0, policy_version 80 (0.0021) +[2024-09-22 15:28:38,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3276.8). Total num frames: 327680. Throughput: 0: 930.8. Samples: 79980. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-22 15:28:38,842][00338] Avg episode reward: [(0, '4.498')] +[2024-09-22 15:28:43,844][00338] Fps is (10 sec: 3275.4, 60 sec: 3617.9, 300 sec: 3237.7). Total num frames: 339968. Throughput: 0: 929.0. Samples: 85210. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:28:43,846][00338] Avg episode reward: [(0, '4.571')] +[2024-09-22 15:28:48,840][00338] Fps is (10 sec: 3276.7, 60 sec: 3618.4, 300 sec: 3276.8). Total num frames: 360448. Throughput: 0: 892.9. Samples: 90364. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:28:48,842][00338] Avg episode reward: [(0, '4.528')] +[2024-09-22 15:28:50,373][02365] Updated weights for policy 0, policy_version 90 (0.0043) +[2024-09-22 15:28:53,839][00338] Fps is (10 sec: 4097.8, 60 sec: 3755.1, 300 sec: 3312.4). Total num frames: 380928. Throughput: 0: 913.7. Samples: 93748. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:28:53,846][00338] Avg episode reward: [(0, '4.674')] +[2024-09-22 15:28:53,848][02352] Saving new best policy, reward=4.674! +[2024-09-22 15:28:58,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3345.1). Total num frames: 401408. Throughput: 0: 952.0. Samples: 99692. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:28:58,842][00338] Avg episode reward: [(0, '4.670')] +[2024-09-22 15:29:01,797][02365] Updated weights for policy 0, policy_version 100 (0.0041) +[2024-09-22 15:29:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3309.6). Total num frames: 413696. Throughput: 0: 907.0. Samples: 104024. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-22 15:29:03,842][00338] Avg episode reward: [(0, '4.638')] +[2024-09-22 15:29:08,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3371.3). Total num frames: 438272. Throughput: 0: 907.1. Samples: 107292. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:29:08,845][00338] Avg episode reward: [(0, '4.721')] +[2024-09-22 15:29:08,855][02352] Saving new best policy, reward=4.721! +[2024-09-22 15:29:11,504][02365] Updated weights for policy 0, policy_version 110 (0.0024) +[2024-09-22 15:29:13,839][00338] Fps is (10 sec: 4095.9, 60 sec: 3754.7, 300 sec: 3367.8). Total num frames: 454656. Throughput: 0: 962.0. Samples: 113950. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:29:13,842][00338] Avg episode reward: [(0, '4.593')] +[2024-09-22 15:29:18,841][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3364.6). Total num frames: 471040. Throughput: 0: 921.3. Samples: 118096. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:29:18,844][00338] Avg episode reward: [(0, '4.543')] +[2024-09-22 15:29:23,642][02365] Updated weights for policy 0, policy_version 120 (0.0039) +[2024-09-22 15:29:23,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3389.8). Total num frames: 491520. Throughput: 0: 906.2. Samples: 120760. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:29:23,844][00338] Avg episode reward: [(0, '4.507')] +[2024-09-22 15:29:28,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3440.6). Total num frames: 516096. Throughput: 0: 946.3. Samples: 127790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:29:28,847][00338] Avg episode reward: [(0, '4.756')] +[2024-09-22 15:29:28,854][02352] Saving new best policy, reward=4.756! +[2024-09-22 15:29:33,842][00338] Fps is (10 sec: 3685.2, 60 sec: 3686.2, 300 sec: 3408.9). Total num frames: 528384. Throughput: 0: 949.5. Samples: 133096. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:29:33,848][00338] Avg episode reward: [(0, '4.737')] +[2024-09-22 15:29:34,058][02365] Updated weights for policy 0, policy_version 130 (0.0043) +[2024-09-22 15:29:38,840][00338] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3430.4). Total num frames: 548864. Throughput: 0: 921.0. Samples: 135194. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:29:38,845][00338] Avg episode reward: [(0, '4.793')] +[2024-09-22 15:29:38,855][02352] Saving new best policy, reward=4.793! +[2024-09-22 15:29:43,839][00338] Fps is (10 sec: 4097.3, 60 sec: 3823.2, 300 sec: 3450.6). Total num frames: 569344. Throughput: 0: 937.6. Samples: 141886. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:29:43,846][00338] Avg episode reward: [(0, '4.871')] +[2024-09-22 15:29:43,848][02352] Saving new best policy, reward=4.871! +[2024-09-22 15:29:44,104][02365] Updated weights for policy 0, policy_version 140 (0.0031) +[2024-09-22 15:29:48,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3469.5). Total num frames: 589824. Throughput: 0: 976.1. Samples: 147950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:29:48,841][00338] Avg episode reward: [(0, '4.581')] +[2024-09-22 15:29:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3440.6). Total num frames: 602112. Throughput: 0: 947.5. Samples: 149930. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:29:53,845][00338] Avg episode reward: [(0, '4.601')] +[2024-09-22 15:29:56,038][02365] Updated weights for policy 0, policy_version 150 (0.0024) +[2024-09-22 15:29:58,840][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3481.6). Total num frames: 626688. Throughput: 0: 927.3. Samples: 155680. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:29:58,847][00338] Avg episode reward: [(0, '4.761')] +[2024-09-22 15:30:03,840][00338] Fps is (10 sec: 4505.4, 60 sec: 3891.2, 300 sec: 3498.2). Total num frames: 647168. Throughput: 0: 987.8. Samples: 162546. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:30:03,847][00338] Avg episode reward: [(0, '4.622')] +[2024-09-22 15:30:05,394][02365] Updated weights for policy 0, policy_version 160 (0.0023) +[2024-09-22 15:30:08,842][00338] Fps is (10 sec: 3685.4, 60 sec: 3754.5, 300 sec: 3492.3). Total num frames: 663552. Throughput: 0: 983.6. Samples: 165026. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:30:08,845][00338] Avg episode reward: [(0, '4.622')] +[2024-09-22 15:30:13,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3507.9). Total num frames: 684032. Throughput: 0: 933.8. Samples: 169810. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:30:13,846][00338] Avg episode reward: [(0, '4.563')] +[2024-09-22 15:30:16,482][02365] Updated weights for policy 0, policy_version 170 (0.0015) +[2024-09-22 15:30:18,839][00338] Fps is (10 sec: 4097.2, 60 sec: 3891.2, 300 sec: 3522.6). Total num frames: 704512. Throughput: 0: 969.0. Samples: 176698. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:30:18,846][00338] Avg episode reward: [(0, '4.883')] +[2024-09-22 15:30:18,861][02352] Saving new best policy, reward=4.883! +[2024-09-22 15:30:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3516.6). Total num frames: 720896. Throughput: 0: 994.9. Samples: 179962. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:30:23,845][00338] Avg episode reward: [(0, '5.263')] +[2024-09-22 15:30:23,851][02352] Saving new best policy, reward=5.263! +[2024-09-22 15:30:28,190][02365] Updated weights for policy 0, policy_version 180 (0.0025) +[2024-09-22 15:30:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3510.9). Total num frames: 737280. Throughput: 0: 934.7. Samples: 183948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:30:28,842][00338] Avg episode reward: [(0, '5.269')] +[2024-09-22 15:30:28,854][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000180_737280.pth... +[2024-09-22 15:30:28,974][02352] Saving new best policy, reward=5.269! +[2024-09-22 15:30:33,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.4, 300 sec: 3543.5). Total num frames: 761856. Throughput: 0: 944.9. Samples: 190472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:30:33,842][00338] Avg episode reward: [(0, '5.052')] +[2024-09-22 15:30:37,155][02365] Updated weights for policy 0, policy_version 190 (0.0032) +[2024-09-22 15:30:38,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3556.1). Total num frames: 782336. Throughput: 0: 978.0. Samples: 193942. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:30:38,851][00338] Avg episode reward: [(0, '5.141')] +[2024-09-22 15:30:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3531.7). Total num frames: 794624. Throughput: 0: 956.9. Samples: 198738. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:30:43,847][00338] Avg episode reward: [(0, '5.166')] +[2024-09-22 15:30:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3543.9). Total num frames: 815104. Throughput: 0: 932.5. Samples: 204506. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:30:48,841][00338] Avg episode reward: [(0, '5.383')] +[2024-09-22 15:30:48,854][02352] Saving new best policy, reward=5.383! +[2024-09-22 15:30:49,154][02365] Updated weights for policy 0, policy_version 200 (0.0025) +[2024-09-22 15:30:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3573.1). Total num frames: 839680. Throughput: 0: 949.0. Samples: 207726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:30:53,843][00338] Avg episode reward: [(0, '5.660')] +[2024-09-22 15:30:53,847][02352] Saving new best policy, reward=5.660! +[2024-09-22 15:30:58,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3549.9). Total num frames: 851968. Throughput: 0: 969.7. Samples: 213446. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:30:58,843][00338] Avg episode reward: [(0, '5.635')] +[2024-09-22 15:31:00,644][02365] Updated weights for policy 0, policy_version 210 (0.0042) +[2024-09-22 15:31:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3561.0). Total num frames: 872448. Throughput: 0: 923.2. Samples: 218242. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:31:03,842][00338] Avg episode reward: [(0, '5.562')] +[2024-09-22 15:31:08,840][00338] Fps is (10 sec: 4095.7, 60 sec: 3823.1, 300 sec: 3571.7). Total num frames: 892928. Throughput: 0: 928.9. Samples: 221762. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:31:08,844][00338] Avg episode reward: [(0, '5.271')] +[2024-09-22 15:31:10,003][02365] Updated weights for policy 0, policy_version 220 (0.0027) +[2024-09-22 15:31:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3582.0). Total num frames: 913408. Throughput: 0: 985.6. Samples: 228298. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:31:13,843][00338] Avg episode reward: [(0, '5.234')] +[2024-09-22 15:31:18,841][00338] Fps is (10 sec: 3276.6, 60 sec: 3686.3, 300 sec: 3560.4). Total num frames: 925696. Throughput: 0: 933.1. Samples: 232464. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:31:18,845][00338] Avg episode reward: [(0, '5.586')] +[2024-09-22 15:31:21,668][02365] Updated weights for policy 0, policy_version 230 (0.0013) +[2024-09-22 15:31:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3585.9). Total num frames: 950272. Throughput: 0: 925.8. Samples: 235604. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:31:23,847][00338] Avg episode reward: [(0, '5.597')] +[2024-09-22 15:31:28,839][00338] Fps is (10 sec: 4915.8, 60 sec: 3959.5, 300 sec: 3610.5). Total num frames: 974848. Throughput: 0: 970.6. Samples: 242414. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:31:28,842][00338] Avg episode reward: [(0, '5.848')] +[2024-09-22 15:31:28,853][02352] Saving new best policy, reward=5.848! +[2024-09-22 15:31:31,812][02365] Updated weights for policy 0, policy_version 240 (0.0033) +[2024-09-22 15:31:33,842][00338] Fps is (10 sec: 3685.3, 60 sec: 3754.5, 300 sec: 3589.5). Total num frames: 987136. Throughput: 0: 950.2. Samples: 247268. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:31:33,848][00338] Avg episode reward: [(0, '5.782')] +[2024-09-22 15:31:38,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3584.0). Total num frames: 1003520. Throughput: 0: 925.8. Samples: 249388. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:31:38,841][00338] Avg episode reward: [(0, '5.718')] +[2024-09-22 15:31:42,606][02365] Updated weights for policy 0, policy_version 250 (0.0021) +[2024-09-22 15:31:43,839][00338] Fps is (10 sec: 4097.2, 60 sec: 3891.2, 300 sec: 3607.4). Total num frames: 1028096. Throughput: 0: 949.8. Samples: 256188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:31:43,842][00338] Avg episode reward: [(0, '6.157')] +[2024-09-22 15:31:43,846][02352] Saving new best policy, reward=6.157! +[2024-09-22 15:31:48,840][00338] Fps is (10 sec: 4095.6, 60 sec: 3822.9, 300 sec: 3601.6). Total num frames: 1044480. Throughput: 0: 967.5. Samples: 261780. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:31:48,845][00338] Avg episode reward: [(0, '6.049')] +[2024-09-22 15:31:53,840][00338] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 1060864. Throughput: 0: 933.1. Samples: 263752. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:31:53,847][00338] Avg episode reward: [(0, '6.390')] +[2024-09-22 15:31:53,849][02352] Saving new best policy, reward=6.390! +[2024-09-22 15:31:54,609][02365] Updated weights for policy 0, policy_version 260 (0.0035) +[2024-09-22 15:31:58,839][00338] Fps is (10 sec: 3686.8, 60 sec: 3822.9, 300 sec: 3665.6). Total num frames: 1081344. Throughput: 0: 921.7. Samples: 269774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:31:58,842][00338] Avg episode reward: [(0, '6.089')] +[2024-09-22 15:32:03,839][00338] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 1101824. Throughput: 0: 974.4. Samples: 276312. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:32:03,843][00338] Avg episode reward: [(0, '6.191')] +[2024-09-22 15:32:04,167][02365] Updated weights for policy 0, policy_version 270 (0.0018) +[2024-09-22 15:32:08,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 1118208. Throughput: 0: 949.4. Samples: 278328. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:32:08,843][00338] Avg episode reward: [(0, '6.406')] +[2024-09-22 15:32:08,856][02352] Saving new best policy, reward=6.406! +[2024-09-22 15:32:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 1138688. Throughput: 0: 912.9. Samples: 283496. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:32:13,842][00338] Avg episode reward: [(0, '6.470')] +[2024-09-22 15:32:13,845][02352] Saving new best policy, reward=6.470! +[2024-09-22 15:32:15,595][02365] Updated weights for policy 0, policy_version 280 (0.0031) +[2024-09-22 15:32:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3762.8). Total num frames: 1159168. Throughput: 0: 957.0. Samples: 290332. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:32:18,845][00338] Avg episode reward: [(0, '7.265')] +[2024-09-22 15:32:18,855][02352] Saving new best policy, reward=7.265! +[2024-09-22 15:32:23,843][00338] Fps is (10 sec: 3684.9, 60 sec: 3754.4, 300 sec: 3748.8). Total num frames: 1175552. Throughput: 0: 970.5. Samples: 293066. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:32:23,847][00338] Avg episode reward: [(0, '7.586')] +[2024-09-22 15:32:23,853][02352] Saving new best policy, reward=7.586! +[2024-09-22 15:32:27,249][02365] Updated weights for policy 0, policy_version 290 (0.0032) +[2024-09-22 15:32:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3748.9). Total num frames: 1191936. Throughput: 0: 916.2. Samples: 297416. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:32:28,842][00338] Avg episode reward: [(0, '7.498')] +[2024-09-22 15:32:28,850][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000291_1191936.pth... +[2024-09-22 15:32:28,990][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000069_282624.pth +[2024-09-22 15:32:33,839][00338] Fps is (10 sec: 4097.7, 60 sec: 3823.1, 300 sec: 3776.7). Total num frames: 1216512. Throughput: 0: 946.3. Samples: 304364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:32:33,847][00338] Avg episode reward: [(0, '7.244')] +[2024-09-22 15:32:36,196][02365] Updated weights for policy 0, policy_version 300 (0.0019) +[2024-09-22 15:32:38,840][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 1236992. Throughput: 0: 978.3. Samples: 307776. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:32:38,844][00338] Avg episode reward: [(0, '7.357')] +[2024-09-22 15:32:43,842][00338] Fps is (10 sec: 3276.0, 60 sec: 3686.3, 300 sec: 3748.9). Total num frames: 1249280. Throughput: 0: 945.5. Samples: 312324. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:32:43,844][00338] Avg episode reward: [(0, '7.439')] +[2024-09-22 15:32:47,796][02365] Updated weights for policy 0, policy_version 310 (0.0048) +[2024-09-22 15:32:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3790.6). Total num frames: 1273856. Throughput: 0: 937.4. Samples: 318494. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:32:48,842][00338] Avg episode reward: [(0, '7.960')] +[2024-09-22 15:32:48,853][02352] Saving new best policy, reward=7.960! +[2024-09-22 15:32:53,839][00338] Fps is (10 sec: 4506.7, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 1294336. Throughput: 0: 963.5. Samples: 321686. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:32:53,845][00338] Avg episode reward: [(0, '8.157')] +[2024-09-22 15:32:53,850][02352] Saving new best policy, reward=8.157! +[2024-09-22 15:32:58,847][00338] Fps is (10 sec: 3274.4, 60 sec: 3754.2, 300 sec: 3748.8). Total num frames: 1306624. Throughput: 0: 967.2. Samples: 327028. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:32:58,853][00338] Avg episode reward: [(0, '8.346')] +[2024-09-22 15:32:58,869][02352] Saving new best policy, reward=8.346! +[2024-09-22 15:32:59,206][02365] Updated weights for policy 0, policy_version 320 (0.0039) +[2024-09-22 15:33:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 1327104. Throughput: 0: 928.3. Samples: 332104. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:33:03,845][00338] Avg episode reward: [(0, '8.598')] +[2024-09-22 15:33:03,849][02352] Saving new best policy, reward=8.598! +[2024-09-22 15:33:08,839][00338] Fps is (10 sec: 4099.1, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1347584. Throughput: 0: 941.2. Samples: 335414. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:33:08,849][00338] Avg episode reward: [(0, '8.822')] +[2024-09-22 15:33:08,860][02352] Saving new best policy, reward=8.822! +[2024-09-22 15:33:09,074][02365] Updated weights for policy 0, policy_version 330 (0.0034) +[2024-09-22 15:33:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1368064. Throughput: 0: 983.4. Samples: 341670. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:33:13,844][00338] Avg episode reward: [(0, '9.428')] +[2024-09-22 15:33:13,846][02352] Saving new best policy, reward=9.428! +[2024-09-22 15:33:18,842][00338] Fps is (10 sec: 3275.9, 60 sec: 3686.2, 300 sec: 3762.7). Total num frames: 1380352. Throughput: 0: 918.6. Samples: 345704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:33:18,847][00338] Avg episode reward: [(0, '9.738')] +[2024-09-22 15:33:18,860][02352] Saving new best policy, reward=9.738! +[2024-09-22 15:33:21,135][02365] Updated weights for policy 0, policy_version 340 (0.0033) +[2024-09-22 15:33:23,840][00338] Fps is (10 sec: 3686.3, 60 sec: 3823.2, 300 sec: 3804.4). Total num frames: 1404928. Throughput: 0: 913.4. Samples: 348880. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:33:23,846][00338] Avg episode reward: [(0, '10.152')] +[2024-09-22 15:33:23,850][02352] Saving new best policy, reward=10.152! +[2024-09-22 15:33:28,839][00338] Fps is (10 sec: 4506.8, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 1425408. Throughput: 0: 963.6. Samples: 355682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:33:28,849][00338] Avg episode reward: [(0, '11.169')] +[2024-09-22 15:33:28,869][02352] Saving new best policy, reward=11.169! +[2024-09-22 15:33:31,534][02365] Updated weights for policy 0, policy_version 350 (0.0029) +[2024-09-22 15:33:33,842][00338] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 1437696. Throughput: 0: 924.7. Samples: 360104. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:33:33,844][00338] Avg episode reward: [(0, '12.188')] +[2024-09-22 15:33:33,849][02352] Saving new best policy, reward=12.188! +[2024-09-22 15:33:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3790.6). Total num frames: 1458176. Throughput: 0: 911.5. Samples: 362704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:33:38,844][00338] Avg episode reward: [(0, '12.282')] +[2024-09-22 15:33:38,856][02352] Saving new best policy, reward=12.282! +[2024-09-22 15:33:42,017][02365] Updated weights for policy 0, policy_version 360 (0.0020) +[2024-09-22 15:33:43,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.4, 300 sec: 3804.4). Total num frames: 1482752. Throughput: 0: 943.4. Samples: 369472. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:33:43,846][00338] Avg episode reward: [(0, '12.300')] +[2024-09-22 15:33:43,852][02352] Saving new best policy, reward=12.300! +[2024-09-22 15:33:48,844][00338] Fps is (10 sec: 3684.8, 60 sec: 3686.1, 300 sec: 3776.6). Total num frames: 1495040. Throughput: 0: 946.7. Samples: 374710. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:33:48,846][00338] Avg episode reward: [(0, '13.072')] +[2024-09-22 15:33:48,864][02352] Saving new best policy, reward=13.072! +[2024-09-22 15:33:53,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 1511424. Throughput: 0: 916.9. Samples: 376676. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:33:53,842][00338] Avg episode reward: [(0, '12.152')] +[2024-09-22 15:33:54,065][02365] Updated weights for policy 0, policy_version 370 (0.0026) +[2024-09-22 15:33:58,839][00338] Fps is (10 sec: 4097.7, 60 sec: 3823.4, 300 sec: 3804.4). Total num frames: 1536000. Throughput: 0: 920.0. Samples: 383068. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:33:58,842][00338] Avg episode reward: [(0, '13.438')] +[2024-09-22 15:33:58,850][02352] Saving new best policy, reward=13.438! +[2024-09-22 15:34:03,824][02365] Updated weights for policy 0, policy_version 380 (0.0036) +[2024-09-22 15:34:03,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1556480. Throughput: 0: 969.8. Samples: 389342. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:34:03,844][00338] Avg episode reward: [(0, '14.617')] +[2024-09-22 15:34:03,847][02352] Saving new best policy, reward=14.617! +[2024-09-22 15:34:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 1568768. Throughput: 0: 941.9. Samples: 391266. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:34:08,846][00338] Avg episode reward: [(0, '14.618')] +[2024-09-22 15:34:08,856][02352] Saving new best policy, reward=14.618! +[2024-09-22 15:34:13,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3790.5). Total num frames: 1589248. Throughput: 0: 911.6. Samples: 396704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:34:13,841][00338] Avg episode reward: [(0, '15.683')] +[2024-09-22 15:34:13,844][02352] Saving new best policy, reward=15.683! +[2024-09-22 15:34:15,197][02365] Updated weights for policy 0, policy_version 390 (0.0028) +[2024-09-22 15:34:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3823.1, 300 sec: 3790.5). Total num frames: 1609728. Throughput: 0: 959.4. Samples: 403278. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:34:18,843][00338] Avg episode reward: [(0, '15.263')] +[2024-09-22 15:34:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 1626112. Throughput: 0: 954.7. Samples: 405664. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:34:23,842][00338] Avg episode reward: [(0, '13.975')] +[2024-09-22 15:34:27,142][02365] Updated weights for policy 0, policy_version 400 (0.0038) +[2024-09-22 15:34:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3776.7). Total num frames: 1642496. Throughput: 0: 905.6. Samples: 410224. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:34:28,846][00338] Avg episode reward: [(0, '14.143')] +[2024-09-22 15:34:28,856][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000401_1642496.pth... +[2024-09-22 15:34:28,973][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000180_737280.pth +[2024-09-22 15:34:33,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1667072. Throughput: 0: 938.4. Samples: 416934. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:34:33,846][00338] Avg episode reward: [(0, '13.346')] +[2024-09-22 15:34:36,573][02365] Updated weights for policy 0, policy_version 410 (0.0026) +[2024-09-22 15:34:38,840][00338] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3776.6). Total num frames: 1683456. Throughput: 0: 967.7. Samples: 420222. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:34:38,842][00338] Avg episode reward: [(0, '14.623')] +[2024-09-22 15:34:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 1699840. Throughput: 0: 915.5. Samples: 424266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:34:43,847][00338] Avg episode reward: [(0, '14.998')] +[2024-09-22 15:34:48,096][02365] Updated weights for policy 0, policy_version 420 (0.0043) +[2024-09-22 15:34:48,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3754.9, 300 sec: 3790.5). Total num frames: 1720320. Throughput: 0: 920.8. Samples: 430778. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:34:48,842][00338] Avg episode reward: [(0, '16.677')] +[2024-09-22 15:34:48,851][02352] Saving new best policy, reward=16.677! +[2024-09-22 15:34:53,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 1740800. Throughput: 0: 951.7. Samples: 434092. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:34:53,846][00338] Avg episode reward: [(0, '16.541')] +[2024-09-22 15:34:58,844][00338] Fps is (10 sec: 3684.8, 60 sec: 3686.1, 300 sec: 3762.7). Total num frames: 1757184. Throughput: 0: 934.1. Samples: 438744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:34:58,852][00338] Avg episode reward: [(0, '16.378')] +[2024-09-22 15:35:00,171][02365] Updated weights for policy 0, policy_version 430 (0.0027) +[2024-09-22 15:35:03,850][00338] Fps is (10 sec: 3273.2, 60 sec: 3617.5, 300 sec: 3762.7). Total num frames: 1773568. Throughput: 0: 912.6. Samples: 444356. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:35:03,856][00338] Avg episode reward: [(0, '16.382')] +[2024-09-22 15:35:08,839][00338] Fps is (10 sec: 4097.7, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 1798144. Throughput: 0: 934.8. Samples: 447728. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:35:08,841][00338] Avg episode reward: [(0, '15.820')] +[2024-09-22 15:35:09,291][02365] Updated weights for policy 0, policy_version 440 (0.0023) +[2024-09-22 15:35:13,841][00338] Fps is (10 sec: 4099.7, 60 sec: 3754.6, 300 sec: 3762.7). Total num frames: 1814528. Throughput: 0: 963.9. Samples: 453602. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 15:35:13,844][00338] Avg episode reward: [(0, '15.270')] +[2024-09-22 15:35:18,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 1830912. Throughput: 0: 918.5. Samples: 458268. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:35:18,842][00338] Avg episode reward: [(0, '16.650')] +[2024-09-22 15:35:21,010][02365] Updated weights for policy 0, policy_version 450 (0.0051) +[2024-09-22 15:35:23,839][00338] Fps is (10 sec: 4096.8, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1855488. Throughput: 0: 919.3. Samples: 461588. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:35:23,842][00338] Avg episode reward: [(0, '17.366')] +[2024-09-22 15:35:23,847][02352] Saving new best policy, reward=17.366! +[2024-09-22 15:35:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1871872. Throughput: 0: 978.2. Samples: 468286. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:35:28,843][00338] Avg episode reward: [(0, '18.678')] +[2024-09-22 15:35:28,855][02352] Saving new best policy, reward=18.678! +[2024-09-22 15:35:32,397][02365] Updated weights for policy 0, policy_version 460 (0.0037) +[2024-09-22 15:35:33,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 1888256. Throughput: 0: 920.7. Samples: 472210. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:35:33,844][00338] Avg episode reward: [(0, '19.187')] +[2024-09-22 15:35:33,850][02352] Saving new best policy, reward=19.187! +[2024-09-22 15:35:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 1908736. Throughput: 0: 909.2. Samples: 475006. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:35:38,842][00338] Avg episode reward: [(0, '20.260')] +[2024-09-22 15:35:38,854][02352] Saving new best policy, reward=20.260! +[2024-09-22 15:35:42,329][02365] Updated weights for policy 0, policy_version 470 (0.0021) +[2024-09-22 15:35:43,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 1929216. Throughput: 0: 956.1. Samples: 481766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:35:43,846][00338] Avg episode reward: [(0, '20.296')] +[2024-09-22 15:35:43,850][02352] Saving new best policy, reward=20.296! +[2024-09-22 15:35:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 1945600. Throughput: 0: 940.0. Samples: 486648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:35:48,844][00338] Avg episode reward: [(0, '20.982')] +[2024-09-22 15:35:48,853][02352] Saving new best policy, reward=20.982! +[2024-09-22 15:35:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 1961984. Throughput: 0: 909.6. Samples: 488658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:35:53,842][00338] Avg episode reward: [(0, '19.365')] +[2024-09-22 15:35:54,484][02365] Updated weights for policy 0, policy_version 480 (0.0031) +[2024-09-22 15:35:58,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.9, 300 sec: 3762.8). Total num frames: 1982464. Throughput: 0: 922.9. Samples: 495132. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:35:58,842][00338] Avg episode reward: [(0, '19.093')] +[2024-09-22 15:36:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3823.6, 300 sec: 3762.8). Total num frames: 2002944. Throughput: 0: 951.8. Samples: 501100. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:36:03,845][00338] Avg episode reward: [(0, '18.858')] +[2024-09-22 15:36:04,921][02365] Updated weights for policy 0, policy_version 490 (0.0029) +[2024-09-22 15:36:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 2015232. Throughput: 0: 922.5. Samples: 503100. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:36:08,845][00338] Avg episode reward: [(0, '19.638')] +[2024-09-22 15:36:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.8, 300 sec: 3776.7). Total num frames: 2039808. Throughput: 0: 901.3. Samples: 508846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:36:13,846][00338] Avg episode reward: [(0, '19.198')] +[2024-09-22 15:36:15,539][02365] Updated weights for policy 0, policy_version 500 (0.0031) +[2024-09-22 15:36:18,841][00338] Fps is (10 sec: 4504.7, 60 sec: 3822.8, 300 sec: 3762.7). Total num frames: 2060288. Throughput: 0: 964.1. Samples: 515596. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:36:18,844][00338] Avg episode reward: [(0, '18.836')] +[2024-09-22 15:36:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3721.1). Total num frames: 2072576. Throughput: 0: 949.1. Samples: 517716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:36:23,842][00338] Avg episode reward: [(0, '20.531')] +[2024-09-22 15:36:27,654][02365] Updated weights for policy 0, policy_version 510 (0.0042) +[2024-09-22 15:36:28,839][00338] Fps is (10 sec: 3277.4, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 2093056. Throughput: 0: 903.8. Samples: 522436. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:36:28,842][00338] Avg episode reward: [(0, '20.714')] +[2024-09-22 15:36:28,857][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth... +[2024-09-22 15:36:28,976][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000291_1191936.pth +[2024-09-22 15:36:33,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 2113536. Throughput: 0: 942.9. Samples: 529080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:36:33,847][00338] Avg episode reward: [(0, '20.633')] +[2024-09-22 15:36:37,242][02365] Updated weights for policy 0, policy_version 520 (0.0022) +[2024-09-22 15:36:38,843][00338] Fps is (10 sec: 4094.4, 60 sec: 3754.4, 300 sec: 3748.8). Total num frames: 2134016. Throughput: 0: 967.1. Samples: 532180. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:36:38,848][00338] Avg episode reward: [(0, '21.002')] +[2024-09-22 15:36:38,867][02352] Saving new best policy, reward=21.002! +[2024-09-22 15:36:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 2146304. Throughput: 0: 912.9. Samples: 536212. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:36:43,847][00338] Avg episode reward: [(0, '22.038')] +[2024-09-22 15:36:43,850][02352] Saving new best policy, reward=22.038! +[2024-09-22 15:36:48,796][02365] Updated weights for policy 0, policy_version 530 (0.0029) +[2024-09-22 15:36:48,840][00338] Fps is (10 sec: 3687.8, 60 sec: 3754.6, 300 sec: 3762.8). Total num frames: 2170880. Throughput: 0: 924.2. Samples: 542690. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:36:48,842][00338] Avg episode reward: [(0, '21.295')] +[2024-09-22 15:36:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2191360. Throughput: 0: 955.7. Samples: 546106. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:36:53,843][00338] Avg episode reward: [(0, '21.918')] +[2024-09-22 15:36:58,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 2203648. Throughput: 0: 927.2. Samples: 550568. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:36:58,845][00338] Avg episode reward: [(0, '22.319')] +[2024-09-22 15:36:58,857][02352] Saving new best policy, reward=22.319! +[2024-09-22 15:37:00,954][02365] Updated weights for policy 0, policy_version 540 (0.0014) +[2024-09-22 15:37:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 2224128. Throughput: 0: 902.2. Samples: 556192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:37:03,849][00338] Avg episode reward: [(0, '20.965')] +[2024-09-22 15:37:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 2244608. Throughput: 0: 929.7. Samples: 559554. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:37:08,846][00338] Avg episode reward: [(0, '21.731')] +[2024-09-22 15:37:10,136][02365] Updated weights for policy 0, policy_version 550 (0.0030) +[2024-09-22 15:37:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 2260992. Throughput: 0: 949.8. Samples: 565176. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:37:13,842][00338] Avg episode reward: [(0, '21.048')] +[2024-09-22 15:37:18,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.3, 300 sec: 3735.0). Total num frames: 2277376. Throughput: 0: 907.6. Samples: 569922. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:37:18,842][00338] Avg episode reward: [(0, '20.025')] +[2024-09-22 15:37:21,838][02365] Updated weights for policy 0, policy_version 560 (0.0031) +[2024-09-22 15:37:23,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2301952. Throughput: 0: 915.8. Samples: 573388. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:37:23,842][00338] Avg episode reward: [(0, '19.551')] +[2024-09-22 15:37:28,841][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2318336. Throughput: 0: 967.1. Samples: 579732. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:37:28,846][00338] Avg episode reward: [(0, '18.000')] +[2024-09-22 15:37:33,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 2330624. Throughput: 0: 912.5. Samples: 583752. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:37:33,843][00338] Avg episode reward: [(0, '17.322')] +[2024-09-22 15:37:34,104][02365] Updated weights for policy 0, policy_version 570 (0.0016) +[2024-09-22 15:37:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.7, 300 sec: 3748.9). Total num frames: 2355200. Throughput: 0: 905.0. Samples: 586832. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:37:38,847][00338] Avg episode reward: [(0, '17.904')] +[2024-09-22 15:37:42,926][02365] Updated weights for policy 0, policy_version 580 (0.0019) +[2024-09-22 15:37:43,839][00338] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 3748.9). Total num frames: 2379776. Throughput: 0: 959.6. Samples: 593752. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:37:43,847][00338] Avg episode reward: [(0, '17.665')] +[2024-09-22 15:37:48,841][00338] Fps is (10 sec: 3685.6, 60 sec: 3686.3, 300 sec: 3721.1). Total num frames: 2392064. Throughput: 0: 941.7. Samples: 598572. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:37:48,844][00338] Avg episode reward: [(0, '18.937')] +[2024-09-22 15:37:53,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3735.1). Total num frames: 2408448. Throughput: 0: 916.2. Samples: 600784. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:37:53,842][00338] Avg episode reward: [(0, '20.292')] +[2024-09-22 15:37:54,825][02365] Updated weights for policy 0, policy_version 590 (0.0025) +[2024-09-22 15:37:58,839][00338] Fps is (10 sec: 4096.8, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 2433024. Throughput: 0: 940.6. Samples: 607502. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:37:58,841][00338] Avg episode reward: [(0, '19.998')] +[2024-09-22 15:38:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2449408. Throughput: 0: 962.6. Samples: 613240. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:38:03,842][00338] Avg episode reward: [(0, '20.432')] +[2024-09-22 15:38:05,706][02365] Updated weights for policy 0, policy_version 600 (0.0046) +[2024-09-22 15:38:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 2465792. Throughput: 0: 930.7. Samples: 615268. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:38:08,842][00338] Avg episode reward: [(0, '21.815')] +[2024-09-22 15:38:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2490368. Throughput: 0: 925.4. Samples: 621374. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:38:13,841][00338] Avg episode reward: [(0, '21.220')] +[2024-09-22 15:38:15,684][02365] Updated weights for policy 0, policy_version 610 (0.0029) +[2024-09-22 15:38:18,840][00338] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3748.9). Total num frames: 2510848. Throughput: 0: 984.4. Samples: 628052. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:38:18,846][00338] Avg episode reward: [(0, '20.758')] +[2024-09-22 15:38:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 2523136. Throughput: 0: 959.5. Samples: 630010. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:38:23,849][00338] Avg episode reward: [(0, '21.828')] +[2024-09-22 15:38:27,491][02365] Updated weights for policy 0, policy_version 620 (0.0048) +[2024-09-22 15:38:28,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 2543616. Throughput: 0: 918.5. Samples: 635086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:38:28,842][00338] Avg episode reward: [(0, '21.377')] +[2024-09-22 15:38:28,850][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000621_2543616.pth... +[2024-09-22 15:38:28,999][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000401_1642496.pth +[2024-09-22 15:38:33,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3748.9). Total num frames: 2564096. Throughput: 0: 960.5. Samples: 641792. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:38:33,841][00338] Avg episode reward: [(0, '21.466')] +[2024-09-22 15:38:37,669][02365] Updated weights for policy 0, policy_version 630 (0.0016) +[2024-09-22 15:38:38,840][00338] Fps is (10 sec: 3686.0, 60 sec: 3754.6, 300 sec: 3721.1). Total num frames: 2580480. Throughput: 0: 973.8. Samples: 644608. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:38:38,843][00338] Avg episode reward: [(0, '21.019')] +[2024-09-22 15:38:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3735.1). Total num frames: 2596864. Throughput: 0: 915.6. Samples: 648704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:38:43,845][00338] Avg episode reward: [(0, '21.530')] +[2024-09-22 15:38:48,657][02365] Updated weights for policy 0, policy_version 640 (0.0024) +[2024-09-22 15:38:48,839][00338] Fps is (10 sec: 4096.5, 60 sec: 3823.1, 300 sec: 3762.8). Total num frames: 2621440. Throughput: 0: 938.4. Samples: 655466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:38:48,842][00338] Avg episode reward: [(0, '21.919')] +[2024-09-22 15:38:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3748.9). Total num frames: 2641920. Throughput: 0: 968.4. Samples: 658844. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:38:53,841][00338] Avg episode reward: [(0, '21.526')] +[2024-09-22 15:38:58,840][00338] Fps is (10 sec: 3276.6, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 2654208. Throughput: 0: 931.2. Samples: 663278. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:38:58,848][00338] Avg episode reward: [(0, '22.322')] +[2024-09-22 15:38:58,859][02352] Saving new best policy, reward=22.322! +[2024-09-22 15:39:00,588][02365] Updated weights for policy 0, policy_version 650 (0.0038) +[2024-09-22 15:39:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 2674688. Throughput: 0: 911.3. Samples: 669062. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:39:03,845][00338] Avg episode reward: [(0, '21.743')] +[2024-09-22 15:39:08,839][00338] Fps is (10 sec: 4505.9, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 2699264. Throughput: 0: 943.7. Samples: 672476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:39:08,845][00338] Avg episode reward: [(0, '21.966')] +[2024-09-22 15:39:09,622][02365] Updated weights for policy 0, policy_version 660 (0.0034) +[2024-09-22 15:39:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 2711552. Throughput: 0: 954.7. Samples: 678048. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:39:13,844][00338] Avg episode reward: [(0, '22.638')] +[2024-09-22 15:39:13,849][02352] Saving new best policy, reward=22.638! +[2024-09-22 15:39:18,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 2727936. Throughput: 0: 911.4. Samples: 682806. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:39:18,846][00338] Avg episode reward: [(0, '23.253')] +[2024-09-22 15:39:18,856][02352] Saving new best policy, reward=23.253! +[2024-09-22 15:39:21,797][02365] Updated weights for policy 0, policy_version 670 (0.0030) +[2024-09-22 15:39:23,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2752512. Throughput: 0: 922.0. Samples: 686096. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:39:23,845][00338] Avg episode reward: [(0, '22.961')] +[2024-09-22 15:39:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 2768896. Throughput: 0: 967.2. Samples: 692230. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:39:28,842][00338] Avg episode reward: [(0, '22.834')] +[2024-09-22 15:39:33,772][02365] Updated weights for policy 0, policy_version 680 (0.0016) +[2024-09-22 15:39:33,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 2785280. Throughput: 0: 908.2. Samples: 696334. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:39:33,841][00338] Avg episode reward: [(0, '22.203')] +[2024-09-22 15:39:38,839][00338] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 2805760. Throughput: 0: 905.8. Samples: 699606. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:39:38,842][00338] Avg episode reward: [(0, '20.710')] +[2024-09-22 15:39:42,756][02365] Updated weights for policy 0, policy_version 690 (0.0023) +[2024-09-22 15:39:43,840][00338] Fps is (10 sec: 4505.4, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 2830336. Throughput: 0: 959.0. Samples: 706432. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:39:43,845][00338] Avg episode reward: [(0, '20.960')] +[2024-09-22 15:39:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 2842624. Throughput: 0: 933.5. Samples: 711068. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:39:48,842][00338] Avg episode reward: [(0, '20.446')] +[2024-09-22 15:39:53,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 2863104. Throughput: 0: 912.8. Samples: 713550. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:39:53,845][00338] Avg episode reward: [(0, '20.966')] +[2024-09-22 15:39:54,674][02365] Updated weights for policy 0, policy_version 700 (0.0038) +[2024-09-22 15:39:58,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3762.9). Total num frames: 2883584. Throughput: 0: 938.8. Samples: 720292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:39:58,847][00338] Avg episode reward: [(0, '22.894')] +[2024-09-22 15:40:03,843][00338] Fps is (10 sec: 3685.1, 60 sec: 3754.5, 300 sec: 3735.0). Total num frames: 2899968. Throughput: 0: 953.7. Samples: 725724. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:40:03,845][00338] Avg episode reward: [(0, '23.483')] +[2024-09-22 15:40:03,853][02352] Saving new best policy, reward=23.483! +[2024-09-22 15:40:05,930][02365] Updated weights for policy 0, policy_version 710 (0.0024) +[2024-09-22 15:40:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 2916352. Throughput: 0: 926.0. Samples: 727764. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:40:08,844][00338] Avg episode reward: [(0, '23.948')] +[2024-09-22 15:40:08,859][02352] Saving new best policy, reward=23.948! +[2024-09-22 15:40:13,839][00338] Fps is (10 sec: 4097.4, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2940928. Throughput: 0: 929.6. Samples: 734064. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:40:13,846][00338] Avg episode reward: [(0, '24.074')] +[2024-09-22 15:40:13,849][02352] Saving new best policy, reward=24.074! +[2024-09-22 15:40:15,722][02365] Updated weights for policy 0, policy_version 720 (0.0022) +[2024-09-22 15:40:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 2957312. Throughput: 0: 978.5. Samples: 740368. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:40:18,844][00338] Avg episode reward: [(0, '24.752')] +[2024-09-22 15:40:18,930][02352] Saving new best policy, reward=24.752! +[2024-09-22 15:40:23,841][00338] Fps is (10 sec: 3276.2, 60 sec: 3686.3, 300 sec: 3735.0). Total num frames: 2973696. Throughput: 0: 948.9. Samples: 742310. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:40:23,847][00338] Avg episode reward: [(0, '25.952')] +[2024-09-22 15:40:23,849][02352] Saving new best policy, reward=25.952! +[2024-09-22 15:40:27,688][02365] Updated weights for policy 0, policy_version 730 (0.0026) +[2024-09-22 15:40:28,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 2994176. Throughput: 0: 916.4. Samples: 747672. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:40:28,846][00338] Avg episode reward: [(0, '24.923')] +[2024-09-22 15:40:28,859][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000731_2994176.pth... +[2024-09-22 15:40:28,976][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth +[2024-09-22 15:40:33,839][00338] Fps is (10 sec: 4096.8, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 3014656. Throughput: 0: 963.5. Samples: 754426. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:40:33,844][00338] Avg episode reward: [(0, '25.106')] +[2024-09-22 15:40:38,188][02365] Updated weights for policy 0, policy_version 740 (0.0030) +[2024-09-22 15:40:38,840][00338] Fps is (10 sec: 3686.2, 60 sec: 3754.6, 300 sec: 3735.0). Total num frames: 3031040. Throughput: 0: 962.6. Samples: 756868. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:40:38,843][00338] Avg episode reward: [(0, '24.266')] +[2024-09-22 15:40:43,839][00338] Fps is (10 sec: 3686.3, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 3051520. Throughput: 0: 917.7. Samples: 761588. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:40:43,841][00338] Avg episode reward: [(0, '23.866')] +[2024-09-22 15:40:48,147][02365] Updated weights for policy 0, policy_version 750 (0.0023) +[2024-09-22 15:40:48,839][00338] Fps is (10 sec: 4096.3, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3072000. Throughput: 0: 949.9. Samples: 768464. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:40:48,846][00338] Avg episode reward: [(0, '23.736')] +[2024-09-22 15:40:53,842][00338] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3092480. Throughput: 0: 979.8. Samples: 771856. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:40:53,845][00338] Avg episode reward: [(0, '24.382')] +[2024-09-22 15:40:58,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 3104768. Throughput: 0: 931.3. Samples: 775974. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:40:58,842][00338] Avg episode reward: [(0, '25.049')] +[2024-09-22 15:41:00,073][02365] Updated weights for policy 0, policy_version 760 (0.0013) +[2024-09-22 15:41:03,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.2, 300 sec: 3776.7). Total num frames: 3129344. Throughput: 0: 936.8. Samples: 782522. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:41:03,842][00338] Avg episode reward: [(0, '25.633')] +[2024-09-22 15:41:08,840][00338] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 3149824. Throughput: 0: 968.9. Samples: 785908. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:41:08,842][00338] Avg episode reward: [(0, '27.327')] +[2024-09-22 15:41:08,854][02352] Saving new best policy, reward=27.327! +[2024-09-22 15:41:09,750][02365] Updated weights for policy 0, policy_version 770 (0.0033) +[2024-09-22 15:41:13,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 3162112. Throughput: 0: 959.1. Samples: 790830. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:41:13,843][00338] Avg episode reward: [(0, '25.621')] +[2024-09-22 15:41:18,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3182592. Throughput: 0: 927.6. Samples: 796170. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:41:18,844][00338] Avg episode reward: [(0, '25.792')] +[2024-09-22 15:41:20,973][02365] Updated weights for policy 0, policy_version 780 (0.0038) +[2024-09-22 15:41:23,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.3, 300 sec: 3776.7). Total num frames: 3207168. Throughput: 0: 949.3. Samples: 799588. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:41:23,844][00338] Avg episode reward: [(0, '26.433')] +[2024-09-22 15:41:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3223552. Throughput: 0: 975.7. Samples: 805494. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:41:28,847][00338] Avg episode reward: [(0, '26.497')] +[2024-09-22 15:41:32,972][02365] Updated weights for policy 0, policy_version 790 (0.0017) +[2024-09-22 15:41:33,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 3235840. Throughput: 0: 918.4. Samples: 809794. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:41:33,845][00338] Avg episode reward: [(0, '25.442')] +[2024-09-22 15:41:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3776.6). Total num frames: 3260416. Throughput: 0: 920.0. Samples: 813256. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:41:38,842][00338] Avg episode reward: [(0, '24.948')] +[2024-09-22 15:41:41,958][02365] Updated weights for policy 0, policy_version 800 (0.0032) +[2024-09-22 15:41:43,840][00338] Fps is (10 sec: 4505.5, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 3280896. Throughput: 0: 981.8. Samples: 820156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:41:43,844][00338] Avg episode reward: [(0, '26.335')] +[2024-09-22 15:41:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 3293184. Throughput: 0: 930.4. Samples: 824388. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:41:48,842][00338] Avg episode reward: [(0, '26.305')] +[2024-09-22 15:41:53,805][02365] Updated weights for policy 0, policy_version 810 (0.0031) +[2024-09-22 15:41:53,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 3317760. Throughput: 0: 918.3. Samples: 827230. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:41:53,842][00338] Avg episode reward: [(0, '24.515')] +[2024-09-22 15:41:58,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 3338240. Throughput: 0: 956.4. Samples: 833866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:41:58,845][00338] Avg episode reward: [(0, '24.959')] +[2024-09-22 15:42:03,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 3354624. Throughput: 0: 951.7. Samples: 838996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:42:03,842][00338] Avg episode reward: [(0, '25.417')] +[2024-09-22 15:42:04,990][02365] Updated weights for policy 0, policy_version 820 (0.0039) +[2024-09-22 15:42:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 3371008. Throughput: 0: 922.9. Samples: 841118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:42:08,842][00338] Avg episode reward: [(0, '25.946')] +[2024-09-22 15:42:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 3395584. Throughput: 0: 944.5. Samples: 847998. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:42:13,847][00338] Avg episode reward: [(0, '24.203')] +[2024-09-22 15:42:14,484][02365] Updated weights for policy 0, policy_version 830 (0.0037) +[2024-09-22 15:42:18,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 3416064. Throughput: 0: 984.8. Samples: 854108. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:42:18,849][00338] Avg episode reward: [(0, '24.179')] +[2024-09-22 15:42:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 3428352. Throughput: 0: 954.7. Samples: 856218. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:42:23,841][00338] Avg episode reward: [(0, '23.670')] +[2024-09-22 15:42:26,027][02365] Updated weights for policy 0, policy_version 840 (0.0038) +[2024-09-22 15:42:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 3448832. Throughput: 0: 929.8. Samples: 861996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:42:28,842][00338] Avg episode reward: [(0, '22.334')] +[2024-09-22 15:42:28,852][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000843_3452928.pth... +[2024-09-22 15:42:28,991][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000621_2543616.pth +[2024-09-22 15:42:33,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3790.5). Total num frames: 3473408. Throughput: 0: 989.2. Samples: 868904. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:42:33,843][00338] Avg episode reward: [(0, '22.276')] +[2024-09-22 15:42:36,245][02365] Updated weights for policy 0, policy_version 850 (0.0021) +[2024-09-22 15:42:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 3485696. Throughput: 0: 972.8. Samples: 871004. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:42:38,842][00338] Avg episode reward: [(0, '22.951')] +[2024-09-22 15:42:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 3506176. Throughput: 0: 937.9. Samples: 876072. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:42:43,842][00338] Avg episode reward: [(0, '22.936')] +[2024-09-22 15:42:46,780][02365] Updated weights for policy 0, policy_version 860 (0.0023) +[2024-09-22 15:42:48,840][00338] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 3530752. Throughput: 0: 978.4. Samples: 883024. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:42:48,843][00338] Avg episode reward: [(0, '23.697')] +[2024-09-22 15:42:53,841][00338] Fps is (10 sec: 4095.4, 60 sec: 3822.8, 300 sec: 3776.6). Total num frames: 3547136. Throughput: 0: 997.9. Samples: 886024. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:42:53,846][00338] Avg episode reward: [(0, '24.406')] +[2024-09-22 15:42:58,483][02365] Updated weights for policy 0, policy_version 870 (0.0016) +[2024-09-22 15:42:58,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 3563520. Throughput: 0: 938.3. Samples: 890222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:42:58,847][00338] Avg episode reward: [(0, '25.191')] +[2024-09-22 15:43:03,839][00338] Fps is (10 sec: 4096.6, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 3588096. Throughput: 0: 951.5. Samples: 896926. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:43:03,843][00338] Avg episode reward: [(0, '25.345')] +[2024-09-22 15:43:07,275][02365] Updated weights for policy 0, policy_version 880 (0.0027) +[2024-09-22 15:43:08,843][00338] Fps is (10 sec: 4504.0, 60 sec: 3959.2, 300 sec: 3790.5). Total num frames: 3608576. Throughput: 0: 982.6. Samples: 900438. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:43:08,846][00338] Avg episode reward: [(0, '24.217')] +[2024-09-22 15:43:13,845][00338] Fps is (10 sec: 3275.0, 60 sec: 3754.3, 300 sec: 3762.7). Total num frames: 3620864. Throughput: 0: 957.3. Samples: 905080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:43:13,847][00338] Avg episode reward: [(0, '23.009')] +[2024-09-22 15:43:18,839][00338] Fps is (10 sec: 3277.9, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 3641344. Throughput: 0: 936.8. Samples: 911062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:43:18,842][00338] Avg episode reward: [(0, '22.656')] +[2024-09-22 15:43:19,095][02365] Updated weights for policy 0, policy_version 890 (0.0037) +[2024-09-22 15:43:23,847][00338] Fps is (10 sec: 4504.5, 60 sec: 3958.9, 300 sec: 3804.3). Total num frames: 3665920. Throughput: 0: 967.0. Samples: 914528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:43:23,850][00338] Avg episode reward: [(0, '22.201')] +[2024-09-22 15:43:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 3682304. Throughput: 0: 978.1. Samples: 920086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:43:28,848][00338] Avg episode reward: [(0, '21.341')] +[2024-09-22 15:43:30,165][02365] Updated weights for policy 0, policy_version 900 (0.0026) +[2024-09-22 15:43:33,839][00338] Fps is (10 sec: 3279.4, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 3698688. Throughput: 0: 936.8. Samples: 925182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:43:33,846][00338] Avg episode reward: [(0, '21.223')] +[2024-09-22 15:43:38,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3818.3). Total num frames: 3723264. Throughput: 0: 947.9. Samples: 928680. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:43:38,846][00338] Avg episode reward: [(0, '22.092')] +[2024-09-22 15:43:39,483][02365] Updated weights for policy 0, policy_version 910 (0.0025) +[2024-09-22 15:43:43,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 3743744. Throughput: 0: 1004.8. Samples: 935438. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:43:43,841][00338] Avg episode reward: [(0, '22.678')] +[2024-09-22 15:43:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 3756032. Throughput: 0: 949.7. Samples: 939662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:43:48,842][00338] Avg episode reward: [(0, '23.779')] +[2024-09-22 15:43:50,925][02365] Updated weights for policy 0, policy_version 920 (0.0030) +[2024-09-22 15:43:53,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3818.3). Total num frames: 3780608. Throughput: 0: 947.6. Samples: 943076. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:43:53,842][00338] Avg episode reward: [(0, '24.754')] +[2024-09-22 15:43:58,841][00338] Fps is (10 sec: 4504.7, 60 sec: 3959.3, 300 sec: 3818.3). Total num frames: 3801088. Throughput: 0: 998.5. Samples: 950010. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:43:58,847][00338] Avg episode reward: [(0, '24.423')] +[2024-09-22 15:44:00,828][02365] Updated weights for policy 0, policy_version 930 (0.0022) +[2024-09-22 15:44:03,843][00338] Fps is (10 sec: 3275.7, 60 sec: 3754.4, 300 sec: 3776.6). Total num frames: 3813376. Throughput: 0: 965.7. Samples: 954520. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:44:03,845][00338] Avg episode reward: [(0, '23.929')] +[2024-09-22 15:44:08,839][00338] Fps is (10 sec: 3277.4, 60 sec: 3754.9, 300 sec: 3804.4). Total num frames: 3833856. Throughput: 0: 944.5. Samples: 957022. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:44:08,842][00338] Avg episode reward: [(0, '23.941')] +[2024-09-22 15:44:11,453][02365] Updated weights for policy 0, policy_version 940 (0.0027) +[2024-09-22 15:44:13,839][00338] Fps is (10 sec: 4507.2, 60 sec: 3959.8, 300 sec: 3832.2). Total num frames: 3858432. Throughput: 0: 977.3. Samples: 964066. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:44:13,842][00338] Avg episode reward: [(0, '25.289')] +[2024-09-22 15:44:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 3874816. Throughput: 0: 985.7. Samples: 969538. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:44:18,844][00338] Avg episode reward: [(0, '25.561')] +[2024-09-22 15:44:23,105][02365] Updated weights for policy 0, policy_version 950 (0.0049) +[2024-09-22 15:44:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3755.2, 300 sec: 3804.4). Total num frames: 3891200. Throughput: 0: 954.7. Samples: 971640. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:44:23,843][00338] Avg episode reward: [(0, '25.334')] +[2024-09-22 15:44:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 3915776. Throughput: 0: 951.8. Samples: 978268. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:44:28,842][00338] Avg episode reward: [(0, '24.556')] +[2024-09-22 15:44:28,850][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000956_3915776.pth... +[2024-09-22 15:44:28,971][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000731_2994176.pth +[2024-09-22 15:44:32,084][02365] Updated weights for policy 0, policy_version 960 (0.0036) +[2024-09-22 15:44:33,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 3936256. Throughput: 0: 995.4. Samples: 984454. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:44:33,844][00338] Avg episode reward: [(0, '25.367')] +[2024-09-22 15:44:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 3948544. Throughput: 0: 964.6. Samples: 986482. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:44:38,844][00338] Avg episode reward: [(0, '25.374')] +[2024-09-22 15:44:43,721][02365] Updated weights for policy 0, policy_version 970 (0.0021) +[2024-09-22 15:44:43,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 3973120. Throughput: 0: 933.9. Samples: 992036. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:44:43,842][00338] Avg episode reward: [(0, '23.943')] +[2024-09-22 15:44:48,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 3993600. Throughput: 0: 989.5. Samples: 999044. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:44:48,847][00338] Avg episode reward: [(0, '24.991')] +[2024-09-22 15:44:53,842][00338] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3818.3). Total num frames: 4009984. Throughput: 0: 989.5. Samples: 1001554. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:44:53,851][00338] Avg episode reward: [(0, '26.841')] +[2024-09-22 15:44:54,901][02365] Updated weights for policy 0, policy_version 980 (0.0021) +[2024-09-22 15:44:58,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3818.3). Total num frames: 4026368. Throughput: 0: 935.6. Samples: 1006168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:44:58,843][00338] Avg episode reward: [(0, '26.905')] +[2024-09-22 15:45:03,840][00338] Fps is (10 sec: 4096.9, 60 sec: 3959.7, 300 sec: 3846.1). Total num frames: 4050944. Throughput: 0: 967.6. Samples: 1013080. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:45:03,847][00338] Avg episode reward: [(0, '26.659')] +[2024-09-22 15:45:04,377][02365] Updated weights for policy 0, policy_version 990 (0.0025) +[2024-09-22 15:45:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 4067328. Throughput: 0: 996.6. Samples: 1016488. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:45:08,846][00338] Avg episode reward: [(0, '26.694')] +[2024-09-22 15:45:13,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 4083712. Throughput: 0: 940.0. Samples: 1020566. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:45:13,846][00338] Avg episode reward: [(0, '28.101')] +[2024-09-22 15:45:13,849][02352] Saving new best policy, reward=28.101! +[2024-09-22 15:45:16,112][02365] Updated weights for policy 0, policy_version 1000 (0.0029) +[2024-09-22 15:45:18,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 4104192. Throughput: 0: 946.7. Samples: 1027054. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 15:45:18,848][00338] Avg episode reward: [(0, '25.362')] +[2024-09-22 15:45:23,839][00338] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 4128768. Throughput: 0: 978.8. Samples: 1030530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:45:23,842][00338] Avg episode reward: [(0, '24.645')] +[2024-09-22 15:45:26,214][02365] Updated weights for policy 0, policy_version 1010 (0.0027) +[2024-09-22 15:45:28,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 4141056. Throughput: 0: 963.5. Samples: 1035392. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:45:28,842][00338] Avg episode reward: [(0, '24.622')] +[2024-09-22 15:45:33,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4161536. Throughput: 0: 930.9. Samples: 1040936. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:45:33,846][00338] Avg episode reward: [(0, '24.693')] +[2024-09-22 15:45:36,770][02365] Updated weights for policy 0, policy_version 1020 (0.0020) +[2024-09-22 15:45:38,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 4186112. Throughput: 0: 954.5. Samples: 1044506. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:45:38,842][00338] Avg episode reward: [(0, '21.831')] +[2024-09-22 15:45:43,842][00338] Fps is (10 sec: 4094.9, 60 sec: 3822.8, 300 sec: 3832.2). Total num frames: 4202496. Throughput: 0: 982.3. Samples: 1050376. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:45:43,846][00338] Avg episode reward: [(0, '21.685')] +[2024-09-22 15:45:48,316][02365] Updated weights for policy 0, policy_version 1030 (0.0018) +[2024-09-22 15:45:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 4218880. Throughput: 0: 937.8. Samples: 1055280. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:45:48,847][00338] Avg episode reward: [(0, '22.084')] +[2024-09-22 15:45:53,839][00338] Fps is (10 sec: 4097.1, 60 sec: 3891.4, 300 sec: 3860.0). Total num frames: 4243456. Throughput: 0: 939.5. Samples: 1058766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:45:53,842][00338] Avg episode reward: [(0, '23.411')] +[2024-09-22 15:45:57,418][02365] Updated weights for policy 0, policy_version 1040 (0.0015) +[2024-09-22 15:45:58,840][00338] Fps is (10 sec: 4505.4, 60 sec: 3959.4, 300 sec: 3846.1). Total num frames: 4263936. Throughput: 0: 998.6. Samples: 1065504. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:45:58,846][00338] Avg episode reward: [(0, '23.480')] +[2024-09-22 15:46:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 4276224. Throughput: 0: 945.3. Samples: 1069594. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:46:03,842][00338] Avg episode reward: [(0, '23.285')] +[2024-09-22 15:46:08,839][00338] Fps is (10 sec: 3277.0, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 4296704. Throughput: 0: 936.7. Samples: 1072682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:46:08,841][00338] Avg episode reward: [(0, '24.546')] +[2024-09-22 15:46:08,944][02365] Updated weights for policy 0, policy_version 1050 (0.0025) +[2024-09-22 15:46:13,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 4321280. Throughput: 0: 985.8. Samples: 1079752. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:46:13,842][00338] Avg episode reward: [(0, '24.498')] +[2024-09-22 15:46:18,845][00338] Fps is (10 sec: 4093.8, 60 sec: 3890.8, 300 sec: 3832.1). Total num frames: 4337664. Throughput: 0: 974.8. Samples: 1084808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:46:18,847][00338] Avg episode reward: [(0, '24.779')] +[2024-09-22 15:46:19,926][02365] Updated weights for policy 0, policy_version 1060 (0.0018) +[2024-09-22 15:46:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 4358144. Throughput: 0: 945.8. Samples: 1087068. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:46:23,842][00338] Avg episode reward: [(0, '24.723')] +[2024-09-22 15:46:28,839][00338] Fps is (10 sec: 4098.2, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4378624. Throughput: 0: 972.8. Samples: 1094150. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:46:28,846][00338] Avg episode reward: [(0, '24.971')] +[2024-09-22 15:46:28,856][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001069_4378624.pth... +[2024-09-22 15:46:28,990][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000843_3452928.pth +[2024-09-22 15:46:29,284][02365] Updated weights for policy 0, policy_version 1070 (0.0019) +[2024-09-22 15:46:33,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 4395008. Throughput: 0: 989.0. Samples: 1099784. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:46:33,844][00338] Avg episode reward: [(0, '24.165')] +[2024-09-22 15:46:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4411392. Throughput: 0: 955.9. Samples: 1101782. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:46:38,841][00338] Avg episode reward: [(0, '25.500')] +[2024-09-22 15:46:41,088][02365] Updated weights for policy 0, policy_version 1080 (0.0029) +[2024-09-22 15:46:43,840][00338] Fps is (10 sec: 4095.9, 60 sec: 3891.3, 300 sec: 3873.8). Total num frames: 4435968. Throughput: 0: 941.2. Samples: 1107860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:46:43,843][00338] Avg episode reward: [(0, '25.466')] +[2024-09-22 15:46:48,844][00338] Fps is (10 sec: 4503.6, 60 sec: 3959.2, 300 sec: 3859.9). Total num frames: 4456448. Throughput: 0: 1001.4. Samples: 1114662. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:46:48,846][00338] Avg episode reward: [(0, '25.895')] +[2024-09-22 15:46:51,279][02365] Updated weights for policy 0, policy_version 1090 (0.0041) +[2024-09-22 15:46:53,844][00338] Fps is (10 sec: 3275.4, 60 sec: 3754.4, 300 sec: 3832.1). Total num frames: 4468736. Throughput: 0: 977.9. Samples: 1116690. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:46:53,847][00338] Avg episode reward: [(0, '26.322')] +[2024-09-22 15:46:58,839][00338] Fps is (10 sec: 3278.3, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 4489216. Throughput: 0: 936.8. Samples: 1121906. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:46:58,842][00338] Avg episode reward: [(0, '25.970')] +[2024-09-22 15:47:01,736][02365] Updated weights for policy 0, policy_version 1100 (0.0022) +[2024-09-22 15:47:03,839][00338] Fps is (10 sec: 4507.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4513792. Throughput: 0: 977.8. Samples: 1128804. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:47:03,845][00338] Avg episode reward: [(0, '25.974')] +[2024-09-22 15:47:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 4530176. Throughput: 0: 990.4. Samples: 1131634. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:47:08,844][00338] Avg episode reward: [(0, '26.032')] +[2024-09-22 15:47:13,316][02365] Updated weights for policy 0, policy_version 1110 (0.0050) +[2024-09-22 15:47:13,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4546560. Throughput: 0: 930.5. Samples: 1136024. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:47:13,841][00338] Avg episode reward: [(0, '24.220')] +[2024-09-22 15:47:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.6, 300 sec: 3873.8). Total num frames: 4571136. Throughput: 0: 959.3. Samples: 1142952. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:47:18,842][00338] Avg episode reward: [(0, '23.942')] +[2024-09-22 15:47:22,326][02365] Updated weights for policy 0, policy_version 1120 (0.0033) +[2024-09-22 15:47:23,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 4591616. Throughput: 0: 990.5. Samples: 1146356. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:47:23,842][00338] Avg episode reward: [(0, '22.853')] +[2024-09-22 15:47:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4603904. Throughput: 0: 952.6. Samples: 1150726. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:47:28,847][00338] Avg episode reward: [(0, '22.471')] +[2024-09-22 15:47:33,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 4624384. Throughput: 0: 938.6. Samples: 1156894. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:47:33,841][00338] Avg episode reward: [(0, '21.920')] +[2024-09-22 15:47:33,904][02365] Updated weights for policy 0, policy_version 1130 (0.0034) +[2024-09-22 15:47:38,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4648960. Throughput: 0: 971.6. Samples: 1160408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:47:38,842][00338] Avg episode reward: [(0, '22.636')] +[2024-09-22 15:47:43,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4661248. Throughput: 0: 974.0. Samples: 1165734. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:47:43,841][00338] Avg episode reward: [(0, '22.844')] +[2024-09-22 15:47:45,646][02365] Updated weights for policy 0, policy_version 1140 (0.0023) +[2024-09-22 15:47:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.9, 300 sec: 3846.1). Total num frames: 4681728. Throughput: 0: 939.6. Samples: 1171084. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:47:48,846][00338] Avg episode reward: [(0, '22.251')] +[2024-09-22 15:47:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.8, 300 sec: 3873.8). Total num frames: 4706304. Throughput: 0: 956.5. Samples: 1174676. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:47:53,845][00338] Avg episode reward: [(0, '24.751')] +[2024-09-22 15:47:54,155][02365] Updated weights for policy 0, policy_version 1150 (0.0027) +[2024-09-22 15:47:58,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 4722688. Throughput: 0: 996.8. Samples: 1180880. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:47:58,842][00338] Avg episode reward: [(0, '24.637')] +[2024-09-22 15:48:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4739072. Throughput: 0: 940.3. Samples: 1185266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:48:03,847][00338] Avg episode reward: [(0, '25.934')] +[2024-09-22 15:48:06,042][02365] Updated weights for policy 0, policy_version 1160 (0.0051) +[2024-09-22 15:48:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 4763648. Throughput: 0: 941.5. Samples: 1188722. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:48:08,847][00338] Avg episode reward: [(0, '26.302')] +[2024-09-22 15:48:13,839][00338] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4784128. Throughput: 0: 999.6. Samples: 1195710. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:48:13,841][00338] Avg episode reward: [(0, '26.414')] +[2024-09-22 15:48:16,168][02365] Updated weights for policy 0, policy_version 1170 (0.0024) +[2024-09-22 15:48:18,847][00338] Fps is (10 sec: 3274.4, 60 sec: 3754.2, 300 sec: 3832.2). Total num frames: 4796416. Throughput: 0: 957.1. Samples: 1199970. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:48:18,853][00338] Avg episode reward: [(0, '27.706')] +[2024-09-22 15:48:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 4820992. Throughput: 0: 944.0. Samples: 1202886. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:48:23,842][00338] Avg episode reward: [(0, '26.611')] +[2024-09-22 15:48:26,367][02365] Updated weights for policy 0, policy_version 1180 (0.0021) +[2024-09-22 15:48:28,839][00338] Fps is (10 sec: 4508.9, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4841472. Throughput: 0: 980.8. Samples: 1209868. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:48:28,846][00338] Avg episode reward: [(0, '27.605')] +[2024-09-22 15:48:28,916][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001183_4845568.pth... +[2024-09-22 15:48:29,037][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000956_3915776.pth +[2024-09-22 15:48:33,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 4857856. Throughput: 0: 973.7. Samples: 1214900. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:48:33,842][00338] Avg episode reward: [(0, '27.598')] +[2024-09-22 15:48:38,356][02365] Updated weights for policy 0, policy_version 1190 (0.0026) +[2024-09-22 15:48:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4874240. Throughput: 0: 940.2. Samples: 1216984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:48:38,847][00338] Avg episode reward: [(0, '28.541')] +[2024-09-22 15:48:38,855][02352] Saving new best policy, reward=28.541! +[2024-09-22 15:48:43,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 4898816. Throughput: 0: 952.3. Samples: 1223732. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:48:43,846][00338] Avg episode reward: [(0, '27.429')] +[2024-09-22 15:48:47,501][02365] Updated weights for policy 0, policy_version 1200 (0.0026) +[2024-09-22 15:48:48,842][00338] Fps is (10 sec: 4504.6, 60 sec: 3959.3, 300 sec: 3859.9). Total num frames: 4919296. Throughput: 0: 992.2. Samples: 1229916. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:48:48,846][00338] Avg episode reward: [(0, '26.557')] +[2024-09-22 15:48:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4931584. Throughput: 0: 960.7. Samples: 1231954. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:48:53,844][00338] Avg episode reward: [(0, '26.408')] +[2024-09-22 15:48:58,751][02365] Updated weights for policy 0, policy_version 1210 (0.0032) +[2024-09-22 15:48:58,839][00338] Fps is (10 sec: 3687.2, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 4956160. Throughput: 0: 938.0. Samples: 1237918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:48:58,843][00338] Avg episode reward: [(0, '25.862')] +[2024-09-22 15:49:03,840][00338] Fps is (10 sec: 4505.1, 60 sec: 3959.4, 300 sec: 3873.8). Total num frames: 4976640. Throughput: 0: 994.8. Samples: 1244730. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:49:03,845][00338] Avg episode reward: [(0, '24.313')] +[2024-09-22 15:49:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 4988928. Throughput: 0: 979.5. Samples: 1246964. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:49:08,845][00338] Avg episode reward: [(0, '25.503')] +[2024-09-22 15:49:10,376][02365] Updated weights for policy 0, policy_version 1220 (0.0013) +[2024-09-22 15:49:13,839][00338] Fps is (10 sec: 3277.1, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 5009408. Throughput: 0: 935.3. Samples: 1251956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:49:13,847][00338] Avg episode reward: [(0, '25.778')] +[2024-09-22 15:49:18,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3960.0, 300 sec: 3873.8). Total num frames: 5033984. Throughput: 0: 979.1. Samples: 1258960. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:49:18,847][00338] Avg episode reward: [(0, '25.932')] +[2024-09-22 15:49:19,321][02365] Updated weights for policy 0, policy_version 1230 (0.0032) +[2024-09-22 15:49:23,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5050368. Throughput: 0: 1001.4. Samples: 1262046. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:49:23,844][00338] Avg episode reward: [(0, '24.164')] +[2024-09-22 15:49:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5066752. Throughput: 0: 945.7. Samples: 1266290. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:49:28,848][00338] Avg episode reward: [(0, '23.159')] +[2024-09-22 15:49:30,779][02365] Updated weights for policy 0, policy_version 1240 (0.0026) +[2024-09-22 15:49:33,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 5091328. Throughput: 0: 956.8. Samples: 1272972. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:49:33,846][00338] Avg episode reward: [(0, '22.880')] +[2024-09-22 15:49:38,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 5111808. Throughput: 0: 988.9. Samples: 1276454. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:49:38,842][00338] Avg episode reward: [(0, '21.987')] +[2024-09-22 15:49:41,452][02365] Updated weights for policy 0, policy_version 1250 (0.0031) +[2024-09-22 15:49:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5124096. Throughput: 0: 959.0. Samples: 1281072. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:49:43,846][00338] Avg episode reward: [(0, '22.300')] +[2024-09-22 15:49:48,840][00338] Fps is (10 sec: 3686.3, 60 sec: 3823.1, 300 sec: 3860.0). Total num frames: 5148672. Throughput: 0: 943.7. Samples: 1287196. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:49:48,848][00338] Avg episode reward: [(0, '22.491')] +[2024-09-22 15:49:51,375][02365] Updated weights for policy 0, policy_version 1260 (0.0017) +[2024-09-22 15:49:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 5169152. Throughput: 0: 972.1. Samples: 1290710. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:49:53,842][00338] Avg episode reward: [(0, '22.279')] +[2024-09-22 15:49:58,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5185536. Throughput: 0: 983.6. Samples: 1296218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:49:58,843][00338] Avg episode reward: [(0, '22.594')] +[2024-09-22 15:50:03,021][02365] Updated weights for policy 0, policy_version 1270 (0.0015) +[2024-09-22 15:50:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 5201920. Throughput: 0: 941.6. Samples: 1301334. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:50:03,844][00338] Avg episode reward: [(0, '22.053')] +[2024-09-22 15:50:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 5226496. Throughput: 0: 950.0. Samples: 1304796. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:50:08,846][00338] Avg episode reward: [(0, '24.005')] +[2024-09-22 15:50:12,416][02365] Updated weights for policy 0, policy_version 1280 (0.0031) +[2024-09-22 15:50:13,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 5246976. Throughput: 0: 997.3. Samples: 1311168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:50:13,843][00338] Avg episode reward: [(0, '24.701')] +[2024-09-22 15:50:18,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5259264. Throughput: 0: 945.6. Samples: 1315526. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:50:18,844][00338] Avg episode reward: [(0, '25.818')] +[2024-09-22 15:50:23,433][02365] Updated weights for policy 0, policy_version 1290 (0.0019) +[2024-09-22 15:50:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 5283840. Throughput: 0: 946.9. Samples: 1319066. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:50:23,847][00338] Avg episode reward: [(0, '26.601')] +[2024-09-22 15:50:28,845][00338] Fps is (10 sec: 4503.2, 60 sec: 3959.1, 300 sec: 3873.8). Total num frames: 5304320. Throughput: 0: 995.9. Samples: 1325892. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:50:28,847][00338] Avg episode reward: [(0, '26.920')] +[2024-09-22 15:50:28,855][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001295_5304320.pth... +[2024-09-22 15:50:29,070][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001069_4378624.pth +[2024-09-22 15:50:33,841][00338] Fps is (10 sec: 3276.3, 60 sec: 3754.6, 300 sec: 3832.2). Total num frames: 5316608. Throughput: 0: 956.1. Samples: 1330220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:50:33,848][00338] Avg episode reward: [(0, '26.971')] +[2024-09-22 15:50:35,233][02365] Updated weights for policy 0, policy_version 1300 (0.0036) +[2024-09-22 15:50:38,839][00338] Fps is (10 sec: 3688.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5341184. Throughput: 0: 938.5. Samples: 1332942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:50:38,844][00338] Avg episode reward: [(0, '27.082')] +[2024-09-22 15:50:43,839][00338] Fps is (10 sec: 4506.3, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 5361664. Throughput: 0: 970.9. Samples: 1339908. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:50:43,846][00338] Avg episode reward: [(0, '28.731')] +[2024-09-22 15:50:43,851][02352] Saving new best policy, reward=28.731! +[2024-09-22 15:50:44,266][02365] Updated weights for policy 0, policy_version 1310 (0.0029) +[2024-09-22 15:50:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3846.1). Total num frames: 5378048. Throughput: 0: 972.7. Samples: 1345106. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:50:48,845][00338] Avg episode reward: [(0, '27.914')] +[2024-09-22 15:50:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5394432. Throughput: 0: 941.1. Samples: 1347146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:50:53,845][00338] Avg episode reward: [(0, '27.807')] +[2024-09-22 15:50:55,777][02365] Updated weights for policy 0, policy_version 1320 (0.0039) +[2024-09-22 15:50:58,841][00338] Fps is (10 sec: 4095.4, 60 sec: 3891.1, 300 sec: 3873.8). Total num frames: 5419008. Throughput: 0: 948.5. Samples: 1353854. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:50:58,846][00338] Avg episode reward: [(0, '27.102')] +[2024-09-22 15:51:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 5435392. Throughput: 0: 989.8. Samples: 1360066. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:51:03,843][00338] Avg episode reward: [(0, '27.408')] +[2024-09-22 15:51:06,826][02365] Updated weights for policy 0, policy_version 1330 (0.0029) +[2024-09-22 15:51:08,843][00338] Fps is (10 sec: 3276.2, 60 sec: 3754.5, 300 sec: 3832.1). Total num frames: 5451776. Throughput: 0: 956.1. Samples: 1362094. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:51:08,851][00338] Avg episode reward: [(0, '26.276')] +[2024-09-22 15:51:13,840][00338] Fps is (10 sec: 3686.2, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 5472256. Throughput: 0: 931.3. Samples: 1367798. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:51:13,848][00338] Avg episode reward: [(0, '24.708')] +[2024-09-22 15:51:16,529][02365] Updated weights for policy 0, policy_version 1340 (0.0021) +[2024-09-22 15:51:18,839][00338] Fps is (10 sec: 4507.1, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 5496832. Throughput: 0: 991.2. Samples: 1374822. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:51:18,842][00338] Avg episode reward: [(0, '25.876')] +[2024-09-22 15:51:23,840][00338] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5513216. Throughput: 0: 984.3. Samples: 1377238. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:51:23,848][00338] Avg episode reward: [(0, '25.140')] +[2024-09-22 15:51:28,119][02365] Updated weights for policy 0, policy_version 1350 (0.0028) +[2024-09-22 15:51:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3755.0, 300 sec: 3846.1). Total num frames: 5529600. Throughput: 0: 935.8. Samples: 1382018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:51:28,842][00338] Avg episode reward: [(0, '25.539')] +[2024-09-22 15:51:33,839][00338] Fps is (10 sec: 4096.1, 60 sec: 3959.6, 300 sec: 3873.8). Total num frames: 5554176. Throughput: 0: 974.2. Samples: 1388946. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:51:33,841][00338] Avg episode reward: [(0, '25.036')] +[2024-09-22 15:51:37,671][02365] Updated weights for policy 0, policy_version 1360 (0.0022) +[2024-09-22 15:51:38,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5570560. Throughput: 0: 999.6. Samples: 1392130. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:51:38,845][00338] Avg episode reward: [(0, '25.885')] +[2024-09-22 15:51:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5586944. Throughput: 0: 943.2. Samples: 1396298. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:51:43,843][00338] Avg episode reward: [(0, '25.270')] +[2024-09-22 15:51:48,816][02365] Updated weights for policy 0, policy_version 1370 (0.0034) +[2024-09-22 15:51:48,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 5611520. Throughput: 0: 952.8. Samples: 1402944. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:51:48,843][00338] Avg episode reward: [(0, '25.207')] +[2024-09-22 15:51:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 5632000. Throughput: 0: 984.8. Samples: 1406408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:51:53,844][00338] Avg episode reward: [(0, '26.145')] +[2024-09-22 15:51:58,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3832.2). Total num frames: 5644288. Throughput: 0: 967.6. Samples: 1411338. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:51:58,841][00338] Avg episode reward: [(0, '26.708')] +[2024-09-22 15:52:00,395][02365] Updated weights for policy 0, policy_version 1380 (0.0036) +[2024-09-22 15:52:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5664768. Throughput: 0: 935.8. Samples: 1416934. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:52:03,847][00338] Avg episode reward: [(0, '25.863')] +[2024-09-22 15:52:08,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.7, 300 sec: 3873.8). Total num frames: 5689344. Throughput: 0: 959.6. Samples: 1420420. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 15:52:08,842][00338] Avg episode reward: [(0, '26.162')] +[2024-09-22 15:52:09,305][02365] Updated weights for policy 0, policy_version 1390 (0.0026) +[2024-09-22 15:52:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5705728. Throughput: 0: 984.9. Samples: 1426338. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:52:13,844][00338] Avg episode reward: [(0, '25.705')] +[2024-09-22 15:52:18,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5722112. Throughput: 0: 937.4. Samples: 1431130. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:52:18,842][00338] Avg episode reward: [(0, '26.444')] +[2024-09-22 15:52:20,961][02365] Updated weights for policy 0, policy_version 1400 (0.0031) +[2024-09-22 15:52:23,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 5746688. Throughput: 0: 944.1. Samples: 1434616. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:52:23,842][00338] Avg episode reward: [(0, '25.719')] +[2024-09-22 15:52:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 5763072. Throughput: 0: 1001.2. Samples: 1441350. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:52:28,844][00338] Avg episode reward: [(0, '26.889')] +[2024-09-22 15:52:28,948][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001408_5767168.pth... +[2024-09-22 15:52:29,086][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001183_4845568.pth +[2024-09-22 15:52:31,989][02365] Updated weights for policy 0, policy_version 1410 (0.0034) +[2024-09-22 15:52:33,839][00338] Fps is (10 sec: 3276.7, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5779456. Throughput: 0: 943.3. Samples: 1445392. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:52:33,845][00338] Avg episode reward: [(0, '27.698')] +[2024-09-22 15:52:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 5799936. Throughput: 0: 934.2. Samples: 1448448. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:52:38,847][00338] Avg episode reward: [(0, '28.958')] +[2024-09-22 15:52:38,883][02352] Saving new best policy, reward=28.958! +[2024-09-22 15:52:41,673][02365] Updated weights for policy 0, policy_version 1420 (0.0025) +[2024-09-22 15:52:43,839][00338] Fps is (10 sec: 4505.7, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 5824512. Throughput: 0: 977.3. Samples: 1455318. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:52:43,846][00338] Avg episode reward: [(0, '29.876')] +[2024-09-22 15:52:43,852][02352] Saving new best policy, reward=29.876! +[2024-09-22 15:52:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5836800. Throughput: 0: 960.4. Samples: 1460154. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:52:48,841][00338] Avg episode reward: [(0, '30.492')] +[2024-09-22 15:52:48,848][02352] Saving new best policy, reward=30.492! +[2024-09-22 15:52:53,558][02365] Updated weights for policy 0, policy_version 1430 (0.0016) +[2024-09-22 15:52:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 5857280. Throughput: 0: 931.8. Samples: 1462350. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:52:53,842][00338] Avg episode reward: [(0, '30.174')] +[2024-09-22 15:52:58,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 5881856. Throughput: 0: 952.7. Samples: 1469208. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:52:58,846][00338] Avg episode reward: [(0, '29.841')] +[2024-09-22 15:53:03,332][02365] Updated weights for policy 0, policy_version 1440 (0.0040) +[2024-09-22 15:53:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 5898240. Throughput: 0: 977.2. Samples: 1475106. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:53:03,843][00338] Avg episode reward: [(0, '28.492')] +[2024-09-22 15:53:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5914624. Throughput: 0: 943.7. Samples: 1477082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:53:08,841][00338] Avg episode reward: [(0, '28.378')] +[2024-09-22 15:53:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.1). Total num frames: 5935104. Throughput: 0: 932.3. Samples: 1483302. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:53:13,847][00338] Avg episode reward: [(0, '26.891')] +[2024-09-22 15:53:13,962][02365] Updated weights for policy 0, policy_version 1450 (0.0030) +[2024-09-22 15:53:18,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 5959680. Throughput: 0: 992.7. Samples: 1490064. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:53:18,844][00338] Avg episode reward: [(0, '25.907')] +[2024-09-22 15:53:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 5971968. Throughput: 0: 970.9. Samples: 1492140. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:53:23,844][00338] Avg episode reward: [(0, '25.435')] +[2024-09-22 15:53:25,663][02365] Updated weights for policy 0, policy_version 1460 (0.0030) +[2024-09-22 15:53:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 5992448. Throughput: 0: 934.6. Samples: 1497374. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:53:28,843][00338] Avg episode reward: [(0, '24.657')] +[2024-09-22 15:53:33,840][00338] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 6017024. Throughput: 0: 981.7. Samples: 1504330. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:53:33,842][00338] Avg episode reward: [(0, '26.586')] +[2024-09-22 15:53:34,705][02365] Updated weights for policy 0, policy_version 1470 (0.0013) +[2024-09-22 15:53:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6029312. Throughput: 0: 994.8. Samples: 1507116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:53:38,843][00338] Avg episode reward: [(0, '26.017')] +[2024-09-22 15:53:43,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6049792. Throughput: 0: 938.7. Samples: 1511450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:53:43,844][00338] Avg episode reward: [(0, '26.007')] +[2024-09-22 15:53:46,217][02365] Updated weights for policy 0, policy_version 1480 (0.0040) +[2024-09-22 15:53:48,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6070272. Throughput: 0: 963.2. Samples: 1518450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:53:48,842][00338] Avg episode reward: [(0, '27.049')] +[2024-09-22 15:53:53,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6090752. Throughput: 0: 996.7. Samples: 1521932. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:53:53,842][00338] Avg episode reward: [(0, '27.243')] +[2024-09-22 15:53:57,304][02365] Updated weights for policy 0, policy_version 1490 (0.0036) +[2024-09-22 15:53:58,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6107136. Throughput: 0: 955.0. Samples: 1526278. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:53:58,846][00338] Avg episode reward: [(0, '27.224')] +[2024-09-22 15:54:03,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 6127616. Throughput: 0: 946.8. Samples: 1532672. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:54:03,843][00338] Avg episode reward: [(0, '26.205')] +[2024-09-22 15:54:06,747][02365] Updated weights for policy 0, policy_version 1500 (0.0033) +[2024-09-22 15:54:08,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 6152192. Throughput: 0: 974.8. Samples: 1536008. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:54:08,847][00338] Avg episode reward: [(0, '26.740')] +[2024-09-22 15:54:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6164480. Throughput: 0: 975.1. Samples: 1541254. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:54:13,853][00338] Avg episode reward: [(0, '27.168')] +[2024-09-22 15:54:18,575][02365] Updated weights for policy 0, policy_version 1510 (0.0021) +[2024-09-22 15:54:18,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 6184960. Throughput: 0: 940.3. Samples: 1546642. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:54:18,846][00338] Avg episode reward: [(0, '26.135')] +[2024-09-22 15:54:23,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 6209536. Throughput: 0: 957.0. Samples: 1550180. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:54:23,842][00338] Avg episode reward: [(0, '26.723')] +[2024-09-22 15:54:28,323][02365] Updated weights for policy 0, policy_version 1520 (0.0030) +[2024-09-22 15:54:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6225920. Throughput: 0: 999.8. Samples: 1556440. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:54:28,843][00338] Avg episode reward: [(0, '27.461')] +[2024-09-22 15:54:28,852][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001520_6225920.pth... +[2024-09-22 15:54:29,014][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001295_5304320.pth +[2024-09-22 15:54:33,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6242304. Throughput: 0: 940.4. Samples: 1560766. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:54:33,846][00338] Avg episode reward: [(0, '29.442')] +[2024-09-22 15:54:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6262784. Throughput: 0: 938.0. Samples: 1564144. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:54:38,844][00338] Avg episode reward: [(0, '29.477')] +[2024-09-22 15:54:38,953][02365] Updated weights for policy 0, policy_version 1530 (0.0040) +[2024-09-22 15:54:43,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 6287360. Throughput: 0: 996.8. Samples: 1571134. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:54:43,843][00338] Avg episode reward: [(0, '29.971')] +[2024-09-22 15:54:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6299648. Throughput: 0: 951.6. Samples: 1575494. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:54:48,843][00338] Avg episode reward: [(0, '30.781')] +[2024-09-22 15:54:48,865][02352] Saving new best policy, reward=30.781! +[2024-09-22 15:54:50,536][02365] Updated weights for policy 0, policy_version 1540 (0.0028) +[2024-09-22 15:54:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6320128. Throughput: 0: 941.4. Samples: 1578372. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:54:53,845][00338] Avg episode reward: [(0, '31.036')] +[2024-09-22 15:54:53,848][02352] Saving new best policy, reward=31.036! +[2024-09-22 15:54:58,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 6344704. Throughput: 0: 977.6. Samples: 1585248. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:54:58,849][00338] Avg episode reward: [(0, '31.565')] +[2024-09-22 15:54:58,860][02352] Saving new best policy, reward=31.565! +[2024-09-22 15:54:59,461][02365] Updated weights for policy 0, policy_version 1550 (0.0020) +[2024-09-22 15:55:03,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6356992. Throughput: 0: 972.6. Samples: 1590408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:55:03,848][00338] Avg episode reward: [(0, '32.025')] +[2024-09-22 15:55:03,887][02352] Saving new best policy, reward=32.025! +[2024-09-22 15:55:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6377472. Throughput: 0: 936.1. Samples: 1592304. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:55:08,841][00338] Avg episode reward: [(0, '31.580')] +[2024-09-22 15:55:11,391][02365] Updated weights for policy 0, policy_version 1560 (0.0017) +[2024-09-22 15:55:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6397952. Throughput: 0: 946.0. Samples: 1599008. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:55:13,842][00338] Avg episode reward: [(0, '32.826')] +[2024-09-22 15:55:13,844][02352] Saving new best policy, reward=32.826! +[2024-09-22 15:55:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 6418432. Throughput: 0: 987.5. Samples: 1605204. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:55:18,843][00338] Avg episode reward: [(0, '33.076')] +[2024-09-22 15:55:18,853][02352] Saving new best policy, reward=33.076! +[2024-09-22 15:55:22,920][02365] Updated weights for policy 0, policy_version 1570 (0.0041) +[2024-09-22 15:55:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3818.4). Total num frames: 6430720. Throughput: 0: 955.4. Samples: 1607136. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:55:23,846][00338] Avg episode reward: [(0, '33.056')] +[2024-09-22 15:55:28,842][00338] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3859.9). Total num frames: 6455296. Throughput: 0: 931.9. Samples: 1613074. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:55:28,844][00338] Avg episode reward: [(0, '31.093')] +[2024-09-22 15:55:32,065][02365] Updated weights for policy 0, policy_version 1580 (0.0025) +[2024-09-22 15:55:33,843][00338] Fps is (10 sec: 4504.1, 60 sec: 3891.0, 300 sec: 3846.0). Total num frames: 6475776. Throughput: 0: 986.7. Samples: 1619900. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:55:33,851][00338] Avg episode reward: [(0, '29.015')] +[2024-09-22 15:55:38,840][00338] Fps is (10 sec: 3687.2, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6492160. Throughput: 0: 971.0. Samples: 1622068. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:55:38,843][00338] Avg episode reward: [(0, '28.312')] +[2024-09-22 15:55:43,661][02365] Updated weights for policy 0, policy_version 1590 (0.0024) +[2024-09-22 15:55:43,839][00338] Fps is (10 sec: 3687.7, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 6512640. Throughput: 0: 928.0. Samples: 1627010. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:55:43,842][00338] Avg episode reward: [(0, '28.615')] +[2024-09-22 15:55:48,840][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6533120. Throughput: 0: 967.9. Samples: 1633964. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:55:48,847][00338] Avg episode reward: [(0, '28.279')] +[2024-09-22 15:55:53,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6549504. Throughput: 0: 995.8. Samples: 1637116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:55:53,845][00338] Avg episode reward: [(0, '27.685')] +[2024-09-22 15:55:54,026][02365] Updated weights for policy 0, policy_version 1600 (0.0020) +[2024-09-22 15:55:58,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 6569984. Throughput: 0: 938.4. Samples: 1641234. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:55:58,842][00338] Avg episode reward: [(0, '29.294')] +[2024-09-22 15:56:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6590464. Throughput: 0: 953.4. Samples: 1648106. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:56:03,841][00338] Avg episode reward: [(0, '29.220')] +[2024-09-22 15:56:04,102][02365] Updated weights for policy 0, policy_version 1610 (0.0024) +[2024-09-22 15:56:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6610944. Throughput: 0: 984.1. Samples: 1651420. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:56:08,845][00338] Avg episode reward: [(0, '28.204')] +[2024-09-22 15:56:13,847][00338] Fps is (10 sec: 3274.2, 60 sec: 3754.2, 300 sec: 3818.2). Total num frames: 6623232. Throughput: 0: 954.4. Samples: 1656028. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:56:13,850][00338] Avg episode reward: [(0, '27.243')] +[2024-09-22 15:56:15,993][02365] Updated weights for policy 0, policy_version 1620 (0.0028) +[2024-09-22 15:56:18,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6647808. Throughput: 0: 937.0. Samples: 1662060. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:56:18,843][00338] Avg episode reward: [(0, '26.851')] +[2024-09-22 15:56:23,839][00338] Fps is (10 sec: 4509.2, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 6668288. Throughput: 0: 964.8. Samples: 1665484. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:56:23,842][00338] Avg episode reward: [(0, '26.709')] +[2024-09-22 15:56:25,173][02365] Updated weights for policy 0, policy_version 1630 (0.0028) +[2024-09-22 15:56:28,840][00338] Fps is (10 sec: 3686.3, 60 sec: 3823.1, 300 sec: 3832.2). Total num frames: 6684672. Throughput: 0: 977.5. Samples: 1670998. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:56:28,844][00338] Avg episode reward: [(0, '26.404')] +[2024-09-22 15:56:28,855][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001632_6684672.pth... +[2024-09-22 15:56:29,005][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001408_5767168.pth +[2024-09-22 15:56:33,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.2, 300 sec: 3846.1). Total num frames: 6705152. Throughput: 0: 936.1. Samples: 1676090. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:56:33,844][00338] Avg episode reward: [(0, '25.980')] +[2024-09-22 15:56:36,564][02365] Updated weights for policy 0, policy_version 1640 (0.0032) +[2024-09-22 15:56:38,839][00338] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6725632. Throughput: 0: 941.5. Samples: 1679484. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:56:38,842][00338] Avg episode reward: [(0, '27.544')] +[2024-09-22 15:56:43,841][00338] Fps is (10 sec: 3685.9, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6742016. Throughput: 0: 991.5. Samples: 1685852. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:56:43,844][00338] Avg episode reward: [(0, '27.599')] +[2024-09-22 15:56:48,387][02365] Updated weights for policy 0, policy_version 1650 (0.0025) +[2024-09-22 15:56:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 6758400. Throughput: 0: 932.4. Samples: 1690066. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:56:48,848][00338] Avg episode reward: [(0, '28.497')] +[2024-09-22 15:56:53,839][00338] Fps is (10 sec: 4096.5, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6782976. Throughput: 0: 936.5. Samples: 1693564. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:56:53,847][00338] Avg episode reward: [(0, '29.723')] +[2024-09-22 15:56:57,229][02365] Updated weights for policy 0, policy_version 1660 (0.0014) +[2024-09-22 15:56:58,847][00338] Fps is (10 sec: 4502.2, 60 sec: 3890.7, 300 sec: 3859.9). Total num frames: 6803456. Throughput: 0: 987.8. Samples: 1700480. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:56:58,849][00338] Avg episode reward: [(0, '29.903')] +[2024-09-22 15:57:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 6815744. Throughput: 0: 954.4. Samples: 1705008. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:57:03,842][00338] Avg episode reward: [(0, '31.017')] +[2024-09-22 15:57:08,839][00338] Fps is (10 sec: 3279.2, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6836224. Throughput: 0: 935.1. Samples: 1707564. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:57:08,848][00338] Avg episode reward: [(0, '30.339')] +[2024-09-22 15:57:09,001][02365] Updated weights for policy 0, policy_version 1670 (0.0038) +[2024-09-22 15:57:13,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3960.0, 300 sec: 3860.0). Total num frames: 6860800. Throughput: 0: 965.9. Samples: 1714462. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:57:13,842][00338] Avg episode reward: [(0, '29.405')] +[2024-09-22 15:57:18,840][00338] Fps is (10 sec: 4095.8, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 6877184. Throughput: 0: 972.9. Samples: 1719870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:57:18,842][00338] Avg episode reward: [(0, '28.626')] +[2024-09-22 15:57:19,805][02365] Updated weights for policy 0, policy_version 1680 (0.0021) +[2024-09-22 15:57:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 6893568. Throughput: 0: 943.6. Samples: 1721948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:57:23,842][00338] Avg episode reward: [(0, '26.692')] +[2024-09-22 15:57:28,839][00338] Fps is (10 sec: 4096.2, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6918144. Throughput: 0: 949.6. Samples: 1728582. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:57:28,842][00338] Avg episode reward: [(0, '27.476')] +[2024-09-22 15:57:29,358][02365] Updated weights for policy 0, policy_version 1690 (0.0033) +[2024-09-22 15:57:33,841][00338] Fps is (10 sec: 4504.9, 60 sec: 3891.1, 300 sec: 3859.9). Total num frames: 6938624. Throughput: 0: 1000.2. Samples: 1735076. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:57:33,845][00338] Avg episode reward: [(0, '25.619')] +[2024-09-22 15:57:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 6950912. Throughput: 0: 966.4. Samples: 1737052. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:57:38,841][00338] Avg episode reward: [(0, '25.420')] +[2024-09-22 15:57:41,123][02365] Updated weights for policy 0, policy_version 1700 (0.0035) +[2024-09-22 15:57:43,839][00338] Fps is (10 sec: 3686.9, 60 sec: 3891.3, 300 sec: 3860.0). Total num frames: 6975488. Throughput: 0: 938.9. Samples: 1742724. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:57:43,841][00338] Avg episode reward: [(0, '25.740')] +[2024-09-22 15:57:48,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 6995968. Throughput: 0: 994.0. Samples: 1749736. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 15:57:48,841][00338] Avg episode reward: [(0, '26.360')] +[2024-09-22 15:57:50,416][02365] Updated weights for policy 0, policy_version 1710 (0.0028) +[2024-09-22 15:57:53,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 7012352. Throughput: 0: 992.3. Samples: 1752216. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:57:53,842][00338] Avg episode reward: [(0, '28.036')] +[2024-09-22 15:57:58,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.4, 300 sec: 3846.1). Total num frames: 7032832. Throughput: 0: 943.8. Samples: 1756932. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:57:58,847][00338] Avg episode reward: [(0, '27.907')] +[2024-09-22 15:58:01,522][02365] Updated weights for policy 0, policy_version 1720 (0.0039) +[2024-09-22 15:58:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7053312. Throughput: 0: 979.3. Samples: 1763936. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:58:03,846][00338] Avg episode reward: [(0, '28.365')] +[2024-09-22 15:58:08,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 7069696. Throughput: 0: 1007.5. Samples: 1767286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:58:08,842][00338] Avg episode reward: [(0, '28.773')] +[2024-09-22 15:58:13,320][02365] Updated weights for policy 0, policy_version 1730 (0.0021) +[2024-09-22 15:58:13,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 7086080. Throughput: 0: 949.6. Samples: 1771314. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:58:13,842][00338] Avg episode reward: [(0, '29.301')] +[2024-09-22 15:58:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7110656. Throughput: 0: 951.1. Samples: 1777874. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:58:18,842][00338] Avg episode reward: [(0, '28.024')] +[2024-09-22 15:58:22,026][02365] Updated weights for policy 0, policy_version 1740 (0.0023) +[2024-09-22 15:58:23,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7131136. Throughput: 0: 985.0. Samples: 1781376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 15:58:23,847][00338] Avg episode reward: [(0, '26.641')] +[2024-09-22 15:58:28,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 7147520. Throughput: 0: 971.2. Samples: 1786430. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:58:28,849][00338] Avg episode reward: [(0, '25.863')] +[2024-09-22 15:58:28,859][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001745_7147520.pth... +[2024-09-22 15:58:29,014][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001520_6225920.pth +[2024-09-22 15:58:33,763][02365] Updated weights for policy 0, policy_version 1750 (0.0050) +[2024-09-22 15:58:33,843][00338] Fps is (10 sec: 3686.2, 60 sec: 3823.0, 300 sec: 3860.0). Total num frames: 7168000. Throughput: 0: 940.3. Samples: 1792048. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:58:33,846][00338] Avg episode reward: [(0, '26.187')] +[2024-09-22 15:58:38,839][00338] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7188480. Throughput: 0: 962.1. Samples: 1795512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:58:38,843][00338] Avg episode reward: [(0, '26.532')] +[2024-09-22 15:58:43,839][00338] Fps is (10 sec: 3686.6, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 7204864. Throughput: 0: 986.6. Samples: 1801328. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:58:43,843][00338] Avg episode reward: [(0, '26.097')] +[2024-09-22 15:58:44,304][02365] Updated weights for policy 0, policy_version 1760 (0.0020) +[2024-09-22 15:58:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 7221248. Throughput: 0: 935.2. Samples: 1806020. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:58:48,842][00338] Avg episode reward: [(0, '26.692')] +[2024-09-22 15:58:53,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7245824. Throughput: 0: 938.9. Samples: 1809538. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:58:53,842][00338] Avg episode reward: [(0, '26.199')] +[2024-09-22 15:58:54,200][02365] Updated weights for policy 0, policy_version 1770 (0.0044) +[2024-09-22 15:58:58,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7266304. Throughput: 0: 1001.7. Samples: 1816392. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:58:58,842][00338] Avg episode reward: [(0, '27.868')] +[2024-09-22 15:59:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 7278592. Throughput: 0: 949.2. Samples: 1820590. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:59:03,846][00338] Avg episode reward: [(0, '26.893')] +[2024-09-22 15:59:05,650][02365] Updated weights for policy 0, policy_version 1780 (0.0035) +[2024-09-22 15:59:08,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7303168. Throughput: 0: 945.0. Samples: 1823902. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:59:08,847][00338] Avg episode reward: [(0, '26.844')] +[2024-09-22 15:59:13,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7323648. Throughput: 0: 982.6. Samples: 1830646. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:59:13,843][00338] Avg episode reward: [(0, '26.670')] +[2024-09-22 15:59:15,383][02365] Updated weights for policy 0, policy_version 1790 (0.0037) +[2024-09-22 15:59:18,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 7340032. Throughput: 0: 966.9. Samples: 1835558. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:59:18,846][00338] Avg episode reward: [(0, '26.069')] +[2024-09-22 15:59:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 7360512. Throughput: 0: 942.6. Samples: 1837930. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:59:23,846][00338] Avg episode reward: [(0, '26.158')] +[2024-09-22 15:59:26,456][02365] Updated weights for policy 0, policy_version 1800 (0.0024) +[2024-09-22 15:59:28,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7380992. Throughput: 0: 965.0. Samples: 1844754. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 15:59:28,847][00338] Avg episode reward: [(0, '24.958')] +[2024-09-22 15:59:33,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3846.1). Total num frames: 7397376. Throughput: 0: 985.5. Samples: 1850368. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 15:59:33,845][00338] Avg episode reward: [(0, '25.940')] +[2024-09-22 15:59:38,019][02365] Updated weights for policy 0, policy_version 1810 (0.0025) +[2024-09-22 15:59:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 7413760. Throughput: 0: 952.3. Samples: 1852390. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 15:59:38,844][00338] Avg episode reward: [(0, '26.396')] +[2024-09-22 15:59:43,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7438336. Throughput: 0: 941.9. Samples: 1858776. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 15:59:43,843][00338] Avg episode reward: [(0, '26.900')] +[2024-09-22 15:59:46,917][02365] Updated weights for policy 0, policy_version 1820 (0.0031) +[2024-09-22 15:59:48,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7458816. Throughput: 0: 994.1. Samples: 1865326. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:59:48,841][00338] Avg episode reward: [(0, '24.309')] +[2024-09-22 15:59:53,841][00338] Fps is (10 sec: 3276.3, 60 sec: 3754.6, 300 sec: 3818.3). Total num frames: 7471104. Throughput: 0: 965.3. Samples: 1867342. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:59:53,843][00338] Avg episode reward: [(0, '25.954')] +[2024-09-22 15:59:58,598][02365] Updated weights for policy 0, policy_version 1830 (0.0036) +[2024-09-22 15:59:58,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 7495680. Throughput: 0: 941.7. Samples: 1873022. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 15:59:58,841][00338] Avg episode reward: [(0, '24.781')] +[2024-09-22 16:00:03,839][00338] Fps is (10 sec: 4915.9, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 7520256. Throughput: 0: 986.9. Samples: 1879970. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:00:03,848][00338] Avg episode reward: [(0, '23.748')] +[2024-09-22 16:00:08,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 7532544. Throughput: 0: 992.0. Samples: 1882568. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:08,846][00338] Avg episode reward: [(0, '23.337')] +[2024-09-22 16:00:09,330][02365] Updated weights for policy 0, policy_version 1840 (0.0026) +[2024-09-22 16:00:13,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 7553024. Throughput: 0: 940.8. Samples: 1887092. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:13,849][00338] Avg episode reward: [(0, '24.276')] +[2024-09-22 16:00:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 7573504. Throughput: 0: 971.2. Samples: 1894072. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:00:18,848][00338] Avg episode reward: [(0, '26.464')] +[2024-09-22 16:00:19,012][02365] Updated weights for policy 0, policy_version 1850 (0.0035) +[2024-09-22 16:00:23,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7593984. Throughput: 0: 1005.9. Samples: 1897656. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:23,842][00338] Avg episode reward: [(0, '27.119')] +[2024-09-22 16:00:28,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 7610368. Throughput: 0: 958.0. Samples: 1901886. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:28,842][00338] Avg episode reward: [(0, '28.117')] +[2024-09-22 16:00:28,855][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001858_7610368.pth... +[2024-09-22 16:00:28,972][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001632_6684672.pth +[2024-09-22 16:00:30,716][02365] Updated weights for policy 0, policy_version 1860 (0.0022) +[2024-09-22 16:00:33,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7630848. Throughput: 0: 948.9. Samples: 1908026. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:33,841][00338] Avg episode reward: [(0, '29.046')] +[2024-09-22 16:00:38,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7651328. Throughput: 0: 979.4. Samples: 1911414. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:38,846][00338] Avg episode reward: [(0, '28.734')] +[2024-09-22 16:00:40,911][02365] Updated weights for policy 0, policy_version 1870 (0.0042) +[2024-09-22 16:00:43,842][00338] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3846.0). Total num frames: 7667712. Throughput: 0: 965.2. Samples: 1916456. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:00:43,844][00338] Avg episode reward: [(0, '27.756')] +[2024-09-22 16:00:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 7684096. Throughput: 0: 930.1. Samples: 1921826. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:48,842][00338] Avg episode reward: [(0, '27.341')] +[2024-09-22 16:00:51,528][02365] Updated weights for policy 0, policy_version 1880 (0.0021) +[2024-09-22 16:00:53,839][00338] Fps is (10 sec: 4097.0, 60 sec: 3959.6, 300 sec: 3860.0). Total num frames: 7708672. Throughput: 0: 950.7. Samples: 1925350. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:53,842][00338] Avg episode reward: [(0, '27.338')] +[2024-09-22 16:00:58,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 7725056. Throughput: 0: 988.8. Samples: 1931590. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:00:58,842][00338] Avg episode reward: [(0, '26.127')] +[2024-09-22 16:01:02,986][02365] Updated weights for policy 0, policy_version 1890 (0.0040) +[2024-09-22 16:01:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 7741440. Throughput: 0: 935.0. Samples: 1936146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:01:03,842][00338] Avg episode reward: [(0, '26.284')] +[2024-09-22 16:01:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 7766016. Throughput: 0: 933.5. Samples: 1939662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:01:08,849][00338] Avg episode reward: [(0, '27.875')] +[2024-09-22 16:01:12,077][02365] Updated weights for policy 0, policy_version 1900 (0.0020) +[2024-09-22 16:01:13,843][00338] Fps is (10 sec: 4503.9, 60 sec: 3891.0, 300 sec: 3859.9). Total num frames: 7786496. Throughput: 0: 987.9. Samples: 1946346. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:01:13,847][00338] Avg episode reward: [(0, '29.957')] +[2024-09-22 16:01:18,841][00338] Fps is (10 sec: 3276.3, 60 sec: 3754.6, 300 sec: 3832.2). Total num frames: 7798784. Throughput: 0: 940.9. Samples: 1950370. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:01:18,843][00338] Avg episode reward: [(0, '29.873')] +[2024-09-22 16:01:23,840][00338] Fps is (10 sec: 3277.9, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 7819264. Throughput: 0: 930.1. Samples: 1953270. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:01:23,845][00338] Avg episode reward: [(0, '31.117')] +[2024-09-22 16:01:24,101][02365] Updated weights for policy 0, policy_version 1910 (0.0034) +[2024-09-22 16:01:28,839][00338] Fps is (10 sec: 4506.2, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 7843840. Throughput: 0: 968.5. Samples: 1960036. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:01:28,848][00338] Avg episode reward: [(0, '30.706')] +[2024-09-22 16:01:33,839][00338] Fps is (10 sec: 4096.1, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 7860224. Throughput: 0: 963.3. Samples: 1965174. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:01:33,844][00338] Avg episode reward: [(0, '32.814')] +[2024-09-22 16:01:35,411][02365] Updated weights for policy 0, policy_version 1920 (0.0031) +[2024-09-22 16:01:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 7876608. Throughput: 0: 928.9. Samples: 1967152. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:01:38,842][00338] Avg episode reward: [(0, '32.829')] +[2024-09-22 16:01:43,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.1, 300 sec: 3860.0). Total num frames: 7897088. Throughput: 0: 938.3. Samples: 1973812. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:01:43,842][00338] Avg episode reward: [(0, '32.362')] +[2024-09-22 16:01:44,924][02365] Updated weights for policy 0, policy_version 1930 (0.0022) +[2024-09-22 16:01:48,840][00338] Fps is (10 sec: 4095.8, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 7917568. Throughput: 0: 973.1. Samples: 1979936. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 16:01:48,842][00338] Avg episode reward: [(0, '30.926')] +[2024-09-22 16:01:53,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.3). Total num frames: 7933952. Throughput: 0: 938.8. Samples: 1981906. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 16:01:53,842][00338] Avg episode reward: [(0, '31.178')] +[2024-09-22 16:01:56,430][02365] Updated weights for policy 0, policy_version 1940 (0.0033) +[2024-09-22 16:01:58,839][00338] Fps is (10 sec: 3686.6, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 7954432. Throughput: 0: 924.6. Samples: 1987950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:01:58,842][00338] Avg episode reward: [(0, '31.557')] +[2024-09-22 16:02:03,843][00338] Fps is (10 sec: 4094.6, 60 sec: 3891.0, 300 sec: 3859.9). Total num frames: 7974912. Throughput: 0: 979.4. Samples: 1994444. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:02:03,847][00338] Avg episode reward: [(0, '33.452')] +[2024-09-22 16:02:03,852][02352] Saving new best policy, reward=33.452! +[2024-09-22 16:02:07,560][02365] Updated weights for policy 0, policy_version 1950 (0.0021) +[2024-09-22 16:02:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 7987200. Throughput: 0: 956.4. Samples: 1996308. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:02:08,847][00338] Avg episode reward: [(0, '32.377')] +[2024-09-22 16:02:13,840][00338] Fps is (10 sec: 3277.8, 60 sec: 3686.6, 300 sec: 3832.2). Total num frames: 8007680. Throughput: 0: 917.0. Samples: 2001300. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:02:13,842][00338] Avg episode reward: [(0, '31.473')] +[2024-09-22 16:02:17,914][02365] Updated weights for policy 0, policy_version 1960 (0.0031) +[2024-09-22 16:02:18,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.3, 300 sec: 3860.0). Total num frames: 8032256. Throughput: 0: 952.4. Samples: 2008032. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:02:18,843][00338] Avg episode reward: [(0, '31.516')] +[2024-09-22 16:02:23,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 8044544. Throughput: 0: 972.7. Samples: 2010924. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:02:23,844][00338] Avg episode reward: [(0, '32.256')] +[2024-09-22 16:02:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 8065024. Throughput: 0: 924.5. Samples: 2015414. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:02:28,844][00338] Avg episode reward: [(0, '31.379')] +[2024-09-22 16:02:28,853][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001969_8065024.pth... +[2024-09-22 16:02:28,979][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001745_7147520.pth +[2024-09-22 16:02:29,428][02365] Updated weights for policy 0, policy_version 1970 (0.0027) +[2024-09-22 16:02:33,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 8089600. Throughput: 0: 940.5. Samples: 2022256. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:02:33,844][00338] Avg episode reward: [(0, '29.941')] +[2024-09-22 16:02:38,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 8105984. Throughput: 0: 972.6. Samples: 2025672. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:02:38,847][00338] Avg episode reward: [(0, '29.904')] +[2024-09-22 16:02:39,179][02365] Updated weights for policy 0, policy_version 1980 (0.0028) +[2024-09-22 16:02:43,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 8122368. Throughput: 0: 936.2. Samples: 2030078. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:02:43,845][00338] Avg episode reward: [(0, '30.342')] +[2024-09-22 16:02:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 8142848. Throughput: 0: 930.4. Samples: 2036310. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:02:48,848][00338] Avg episode reward: [(0, '29.656')] +[2024-09-22 16:02:50,024][02365] Updated weights for policy 0, policy_version 1990 (0.0025) +[2024-09-22 16:02:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 8167424. Throughput: 0: 966.8. Samples: 2039814. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:02:53,848][00338] Avg episode reward: [(0, '28.966')] +[2024-09-22 16:02:58,840][00338] Fps is (10 sec: 3686.0, 60 sec: 3754.6, 300 sec: 3818.3). Total num frames: 8179712. Throughput: 0: 975.6. Samples: 2045202. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:02:58,848][00338] Avg episode reward: [(0, '29.710')] +[2024-09-22 16:03:01,537][02365] Updated weights for policy 0, policy_version 2000 (0.0026) +[2024-09-22 16:03:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.9, 300 sec: 3832.2). Total num frames: 8200192. Throughput: 0: 947.2. Samples: 2050654. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:03:03,842][00338] Avg episode reward: [(0, '28.883')] +[2024-09-22 16:03:08,839][00338] Fps is (10 sec: 4506.1, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 8224768. Throughput: 0: 959.8. Samples: 2054116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:03:08,842][00338] Avg episode reward: [(0, '27.991')] +[2024-09-22 16:03:10,311][02365] Updated weights for policy 0, policy_version 2010 (0.0032) +[2024-09-22 16:03:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 8241152. Throughput: 0: 995.7. Samples: 2060222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:03:13,842][00338] Avg episode reward: [(0, '26.356')] +[2024-09-22 16:03:18,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 8257536. Throughput: 0: 944.9. Samples: 2064778. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:03:18,842][00338] Avg episode reward: [(0, '28.131')] +[2024-09-22 16:03:21,931][02365] Updated weights for policy 0, policy_version 2020 (0.0044) +[2024-09-22 16:03:23,840][00338] Fps is (10 sec: 4095.9, 60 sec: 3959.4, 300 sec: 3846.1). Total num frames: 8282112. Throughput: 0: 946.6. Samples: 2068268. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:03:23,841][00338] Avg episode reward: [(0, '24.775')] +[2024-09-22 16:03:28,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 8302592. Throughput: 0: 1005.2. Samples: 2075314. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:03:28,846][00338] Avg episode reward: [(0, '24.982')] +[2024-09-22 16:03:32,489][02365] Updated weights for policy 0, policy_version 2030 (0.0013) +[2024-09-22 16:03:33,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 8314880. Throughput: 0: 964.2. Samples: 2079698. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:03:33,844][00338] Avg episode reward: [(0, '25.855')] +[2024-09-22 16:03:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 8339456. Throughput: 0: 950.8. Samples: 2082600. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:03:38,850][00338] Avg episode reward: [(0, '26.736')] +[2024-09-22 16:03:42,296][02365] Updated weights for policy 0, policy_version 2040 (0.0021) +[2024-09-22 16:03:43,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 8359936. Throughput: 0: 985.4. Samples: 2089546. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:03:43,842][00338] Avg episode reward: [(0, '26.007')] +[2024-09-22 16:03:48,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 8376320. Throughput: 0: 978.4. Samples: 2094680. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 16:03:48,842][00338] Avg episode reward: [(0, '26.375')] +[2024-09-22 16:03:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 8392704. Throughput: 0: 948.8. Samples: 2096814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:03:53,842][00338] Avg episode reward: [(0, '26.601')] +[2024-09-22 16:03:53,862][02365] Updated weights for policy 0, policy_version 2050 (0.0022) +[2024-09-22 16:03:58,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 8417280. Throughput: 0: 966.2. Samples: 2103702. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 16:03:58,842][00338] Avg episode reward: [(0, '27.059')] +[2024-09-22 16:04:03,396][02365] Updated weights for policy 0, policy_version 2060 (0.0040) +[2024-09-22 16:04:03,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 8437760. Throughput: 0: 1002.0. Samples: 2109866. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 16:04:03,842][00338] Avg episode reward: [(0, '26.839')] +[2024-09-22 16:04:08,840][00338] Fps is (10 sec: 3686.2, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 8454144. Throughput: 0: 970.0. Samples: 2111916. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:04:08,846][00338] Avg episode reward: [(0, '28.296')] +[2024-09-22 16:04:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 8474624. Throughput: 0: 944.4. Samples: 2117810. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:04:13,843][00338] Avg episode reward: [(0, '29.946')] +[2024-09-22 16:04:14,383][02365] Updated weights for policy 0, policy_version 2070 (0.0017) +[2024-09-22 16:04:18,841][00338] Fps is (10 sec: 4505.2, 60 sec: 4027.6, 300 sec: 3859.9). Total num frames: 8499200. Throughput: 0: 1003.0. Samples: 2124836. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:04:18,850][00338] Avg episode reward: [(0, '30.634')] +[2024-09-22 16:04:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 8511488. Throughput: 0: 985.2. Samples: 2126934. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:04:23,842][00338] Avg episode reward: [(0, '31.482')] +[2024-09-22 16:04:25,912][02365] Updated weights for policy 0, policy_version 2080 (0.0035) +[2024-09-22 16:04:28,839][00338] Fps is (10 sec: 3277.3, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 8531968. Throughput: 0: 946.5. Samples: 2132138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:04:28,842][00338] Avg episode reward: [(0, '31.025')] +[2024-09-22 16:04:28,858][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002083_8531968.pth... +[2024-09-22 16:04:29,005][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001858_7610368.pth +[2024-09-22 16:04:33,840][00338] Fps is (10 sec: 4095.9, 60 sec: 3959.4, 300 sec: 3860.0). Total num frames: 8552448. Throughput: 0: 985.5. Samples: 2139030. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:04:33,842][00338] Avg episode reward: [(0, '29.630')] +[2024-09-22 16:04:34,840][02365] Updated weights for policy 0, policy_version 2090 (0.0028) +[2024-09-22 16:04:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 8568832. Throughput: 0: 1002.4. Samples: 2141924. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 16:04:38,842][00338] Avg episode reward: [(0, '27.281')] +[2024-09-22 16:04:43,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 8585216. Throughput: 0: 939.2. Samples: 2145966. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:04:43,841][00338] Avg episode reward: [(0, '25.791')] +[2024-09-22 16:04:46,755][02365] Updated weights for policy 0, policy_version 2100 (0.0040) +[2024-09-22 16:04:48,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 8609792. Throughput: 0: 953.7. Samples: 2152782. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:04:48,848][00338] Avg episode reward: [(0, '25.506')] +[2024-09-22 16:04:53,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 8630272. Throughput: 0: 983.5. Samples: 2156172. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:04:53,846][00338] Avg episode reward: [(0, '25.359')] +[2024-09-22 16:04:57,980][02365] Updated weights for policy 0, policy_version 2110 (0.0019) +[2024-09-22 16:04:58,843][00338] Fps is (10 sec: 3275.6, 60 sec: 3754.4, 300 sec: 3804.4). Total num frames: 8642560. Throughput: 0: 953.3. Samples: 2160714. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:04:58,845][00338] Avg episode reward: [(0, '25.176')] +[2024-09-22 16:05:03,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 8663040. Throughput: 0: 929.1. Samples: 2166644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:05:03,848][00338] Avg episode reward: [(0, '24.972')] +[2024-09-22 16:05:07,348][02365] Updated weights for policy 0, policy_version 2120 (0.0013) +[2024-09-22 16:05:08,839][00338] Fps is (10 sec: 4507.2, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 8687616. Throughput: 0: 960.3. Samples: 2170146. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:05:08,846][00338] Avg episode reward: [(0, '26.652')] +[2024-09-22 16:05:13,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 8704000. Throughput: 0: 962.7. Samples: 2175460. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:05:13,844][00338] Avg episode reward: [(0, '27.498')] +[2024-09-22 16:05:18,840][00338] Fps is (10 sec: 3276.7, 60 sec: 3686.5, 300 sec: 3818.3). Total num frames: 8720384. Throughput: 0: 919.7. Samples: 2180416. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:05:18,841][00338] Avg episode reward: [(0, '27.897')] +[2024-09-22 16:05:19,389][02365] Updated weights for policy 0, policy_version 2130 (0.0022) +[2024-09-22 16:05:23,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 8740864. Throughput: 0: 928.8. Samples: 2183720. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:05:23,841][00338] Avg episode reward: [(0, '27.871')] +[2024-09-22 16:05:28,843][00338] Fps is (10 sec: 4094.4, 60 sec: 3822.7, 300 sec: 3832.1). Total num frames: 8761344. Throughput: 0: 976.3. Samples: 2189904. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:05:28,846][00338] Avg episode reward: [(0, '30.535')] +[2024-09-22 16:05:30,287][02365] Updated weights for policy 0, policy_version 2140 (0.0022) +[2024-09-22 16:05:33,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3804.4). Total num frames: 8773632. Throughput: 0: 916.9. Samples: 2194042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:05:33,849][00338] Avg episode reward: [(0, '31.388')] +[2024-09-22 16:05:38,839][00338] Fps is (10 sec: 3687.9, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 8798208. Throughput: 0: 916.9. Samples: 2197432. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:05:38,844][00338] Avg episode reward: [(0, '30.914')] +[2024-09-22 16:05:40,492][02365] Updated weights for policy 0, policy_version 2150 (0.0027) +[2024-09-22 16:05:43,839][00338] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 8818688. Throughput: 0: 961.0. Samples: 2203954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:05:43,849][00338] Avg episode reward: [(0, '29.576')] +[2024-09-22 16:05:48,839][00338] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3804.4). Total num frames: 8830976. Throughput: 0: 924.8. Samples: 2208262. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:05:48,842][00338] Avg episode reward: [(0, '29.672')] +[2024-09-22 16:05:52,737][02365] Updated weights for policy 0, policy_version 2160 (0.0047) +[2024-09-22 16:05:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 8851456. Throughput: 0: 904.8. Samples: 2210860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:05:53,846][00338] Avg episode reward: [(0, '29.520')] +[2024-09-22 16:05:58,839][00338] Fps is (10 sec: 4096.1, 60 sec: 3823.2, 300 sec: 3832.2). Total num frames: 8871936. Throughput: 0: 932.7. Samples: 2217430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:05:58,842][00338] Avg episode reward: [(0, '29.407')] +[2024-09-22 16:06:03,385][02365] Updated weights for policy 0, policy_version 2170 (0.0025) +[2024-09-22 16:06:03,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3804.4). Total num frames: 8888320. Throughput: 0: 936.4. Samples: 2222556. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:06:03,847][00338] Avg episode reward: [(0, '28.083')] +[2024-09-22 16:06:08,840][00338] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3790.6). Total num frames: 8904704. Throughput: 0: 908.6. Samples: 2224606. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:06:08,848][00338] Avg episode reward: [(0, '28.735')] +[2024-09-22 16:06:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 8925184. Throughput: 0: 912.3. Samples: 2230954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:06:13,841][00338] Avg episode reward: [(0, '30.509')] +[2024-09-22 16:06:14,081][02365] Updated weights for policy 0, policy_version 2180 (0.0021) +[2024-09-22 16:06:18,839][00338] Fps is (10 sec: 4096.1, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 8945664. Throughput: 0: 948.4. Samples: 2236718. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:06:18,848][00338] Avg episode reward: [(0, '31.683')] +[2024-09-22 16:06:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3776.7). Total num frames: 8957952. Throughput: 0: 917.2. Samples: 2238708. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:06:23,842][00338] Avg episode reward: [(0, '31.514')] +[2024-09-22 16:06:26,197][02365] Updated weights for policy 0, policy_version 2190 (0.0024) +[2024-09-22 16:06:28,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.4, 300 sec: 3790.5). Total num frames: 8978432. Throughput: 0: 898.8. Samples: 2244398. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:06:28,843][00338] Avg episode reward: [(0, '31.397')] +[2024-09-22 16:06:28,856][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002193_8982528.pth... +[2024-09-22 16:06:28,981][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001969_8065024.pth +[2024-09-22 16:06:33,841][00338] Fps is (10 sec: 4505.0, 60 sec: 3822.8, 300 sec: 3818.3). Total num frames: 9003008. Throughput: 0: 952.2. Samples: 2251110. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:06:33,845][00338] Avg episode reward: [(0, '30.621')] +[2024-09-22 16:06:36,287][02365] Updated weights for policy 0, policy_version 2200 (0.0031) +[2024-09-22 16:06:38,840][00338] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3790.5). Total num frames: 9015296. Throughput: 0: 944.1. Samples: 2253344. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:06:38,847][00338] Avg episode reward: [(0, '31.297')] +[2024-09-22 16:06:43,839][00338] Fps is (10 sec: 3277.3, 60 sec: 3618.1, 300 sec: 3790.5). Total num frames: 9035776. Throughput: 0: 901.3. Samples: 2257990. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:06:43,842][00338] Avg episode reward: [(0, '31.713')] +[2024-09-22 16:06:47,439][02365] Updated weights for policy 0, policy_version 2210 (0.0020) +[2024-09-22 16:06:48,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3804.4). Total num frames: 9056256. Throughput: 0: 932.0. Samples: 2264496. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:06:48,847][00338] Avg episode reward: [(0, '31.115')] +[2024-09-22 16:06:53,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3790.5). Total num frames: 9072640. Throughput: 0: 955.9. Samples: 2267622. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:06:53,844][00338] Avg episode reward: [(0, '31.876')] +[2024-09-22 16:06:58,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3776.7). Total num frames: 9089024. Throughput: 0: 903.8. Samples: 2271624. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:06:58,842][00338] Avg episode reward: [(0, '31.079')] +[2024-09-22 16:06:59,598][02365] Updated weights for policy 0, policy_version 2220 (0.0033) +[2024-09-22 16:07:03,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3804.4). Total num frames: 9109504. Throughput: 0: 916.0. Samples: 2277940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:07:03,847][00338] Avg episode reward: [(0, '29.828')] +[2024-09-22 16:07:08,841][00338] Fps is (10 sec: 4095.3, 60 sec: 3754.6, 300 sec: 3804.4). Total num frames: 9129984. Throughput: 0: 942.8. Samples: 2281134. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:07:08,844][00338] Avg episode reward: [(0, '28.222')] +[2024-09-22 16:07:09,524][02365] Updated weights for policy 0, policy_version 2230 (0.0021) +[2024-09-22 16:07:13,846][00338] Fps is (10 sec: 3274.7, 60 sec: 3617.7, 300 sec: 3762.7). Total num frames: 9142272. Throughput: 0: 920.0. Samples: 2285806. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:07:13,861][00338] Avg episode reward: [(0, '27.128')] +[2024-09-22 16:07:18,839][00338] Fps is (10 sec: 3277.4, 60 sec: 3618.1, 300 sec: 3790.5). Total num frames: 9162752. Throughput: 0: 886.2. Samples: 2290986. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:07:18,847][00338] Avg episode reward: [(0, '25.770')] +[2024-09-22 16:07:21,320][02365] Updated weights for policy 0, policy_version 2240 (0.0021) +[2024-09-22 16:07:23,839][00338] Fps is (10 sec: 4098.6, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 9183232. Throughput: 0: 907.7. Samples: 2294192. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:07:23,842][00338] Avg episode reward: [(0, '26.569')] +[2024-09-22 16:07:28,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 9199616. Throughput: 0: 928.7. Samples: 2299780. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:07:28,842][00338] Avg episode reward: [(0, '27.256')] +[2024-09-22 16:07:33,670][02365] Updated weights for policy 0, policy_version 2250 (0.0038) +[2024-09-22 16:07:33,840][00338] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3762.8). Total num frames: 9216000. Throughput: 0: 877.8. Samples: 2303996. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 16:07:33,848][00338] Avg episode reward: [(0, '26.881')] +[2024-09-22 16:07:38,840][00338] Fps is (10 sec: 3686.3, 60 sec: 3686.4, 300 sec: 3776.6). Total num frames: 9236480. Throughput: 0: 879.3. Samples: 2307192. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:07:38,848][00338] Avg episode reward: [(0, '28.506')] +[2024-09-22 16:07:43,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 9252864. Throughput: 0: 936.6. Samples: 2313772. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:07:43,843][00338] Avg episode reward: [(0, '28.727')] +[2024-09-22 16:07:43,855][02365] Updated weights for policy 0, policy_version 2260 (0.0020) +[2024-09-22 16:07:48,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3735.0). Total num frames: 9269248. Throughput: 0: 881.6. Samples: 2317612. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:07:48,843][00338] Avg episode reward: [(0, '29.467')] +[2024-09-22 16:07:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3748.9). Total num frames: 9285632. Throughput: 0: 868.5. Samples: 2320214. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:07:53,842][00338] Avg episode reward: [(0, '29.190')] +[2024-09-22 16:07:55,712][02365] Updated weights for policy 0, policy_version 2270 (0.0027) +[2024-09-22 16:07:58,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 9310208. Throughput: 0: 913.0. Samples: 2326884. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:07:58,847][00338] Avg episode reward: [(0, '28.779')] +[2024-09-22 16:08:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 9326592. Throughput: 0: 911.6. Samples: 2332008. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:08:03,845][00338] Avg episode reward: [(0, '27.635')] +[2024-09-22 16:08:07,591][02365] Updated weights for policy 0, policy_version 2280 (0.0017) +[2024-09-22 16:08:08,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3735.0). Total num frames: 9342976. Throughput: 0: 884.4. Samples: 2333992. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 16:08:08,847][00338] Avg episode reward: [(0, '28.898')] +[2024-09-22 16:08:13,839][00338] Fps is (10 sec: 3686.3, 60 sec: 3686.8, 300 sec: 3748.9). Total num frames: 9363456. Throughput: 0: 904.8. Samples: 2340498. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:08:13,842][00338] Avg episode reward: [(0, '28.098')] +[2024-09-22 16:08:16,761][02365] Updated weights for policy 0, policy_version 2290 (0.0048) +[2024-09-22 16:08:18,840][00338] Fps is (10 sec: 4095.7, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 9383936. Throughput: 0: 946.6. Samples: 2346594. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:08:18,846][00338] Avg episode reward: [(0, '28.231')] +[2024-09-22 16:08:23,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 9396224. Throughput: 0: 920.4. Samples: 2348608. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:08:23,843][00338] Avg episode reward: [(0, '27.925')] +[2024-09-22 16:08:28,571][02365] Updated weights for policy 0, policy_version 2300 (0.0030) +[2024-09-22 16:08:28,840][00338] Fps is (10 sec: 3686.6, 60 sec: 3686.4, 300 sec: 3748.9). Total num frames: 9420800. Throughput: 0: 899.7. Samples: 2354258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:08:28,843][00338] Avg episode reward: [(0, '28.819')] +[2024-09-22 16:08:28,851][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002300_9420800.pth... +[2024-09-22 16:08:28,967][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002083_8531968.pth +[2024-09-22 16:08:33,840][00338] Fps is (10 sec: 4505.3, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 9441280. Throughput: 0: 967.0. Samples: 2361128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:08:33,842][00338] Avg episode reward: [(0, '30.432')] +[2024-09-22 16:08:38,840][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 9457664. Throughput: 0: 959.9. Samples: 2363410. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:08:38,841][00338] Avg episode reward: [(0, '30.146')] +[2024-09-22 16:08:40,193][02365] Updated weights for policy 0, policy_version 2310 (0.0043) +[2024-09-22 16:08:43,839][00338] Fps is (10 sec: 3277.0, 60 sec: 3686.4, 300 sec: 3721.1). Total num frames: 9474048. Throughput: 0: 913.4. Samples: 2367986. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:08:43,843][00338] Avg episode reward: [(0, '29.738')] +[2024-09-22 16:08:48,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3735.0). Total num frames: 9494528. Throughput: 0: 939.6. Samples: 2374290. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:08:48,842][00338] Avg episode reward: [(0, '28.896')] +[2024-09-22 16:08:50,113][02365] Updated weights for policy 0, policy_version 2320 (0.0018) +[2024-09-22 16:08:53,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 9510912. Throughput: 0: 963.8. Samples: 2377364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:08:53,843][00338] Avg episode reward: [(0, '29.110')] +[2024-09-22 16:08:58,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3693.3). Total num frames: 9527296. Throughput: 0: 906.8. Samples: 2381306. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:08:58,842][00338] Avg episode reward: [(0, '28.651')] +[2024-09-22 16:09:02,405][02365] Updated weights for policy 0, policy_version 2330 (0.0022) +[2024-09-22 16:09:03,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 9547776. Throughput: 0: 906.5. Samples: 2387388. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:09:03,842][00338] Avg episode reward: [(0, '28.253')] +[2024-09-22 16:09:08,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3707.2). Total num frames: 9568256. Throughput: 0: 932.6. Samples: 2390574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:09:08,842][00338] Avg episode reward: [(0, '29.159')] +[2024-09-22 16:09:13,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 9580544. Throughput: 0: 909.6. Samples: 2395192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:09:13,842][00338] Avg episode reward: [(0, '29.965')] +[2024-09-22 16:09:14,846][02365] Updated weights for policy 0, policy_version 2340 (0.0013) +[2024-09-22 16:09:18,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3693.3). Total num frames: 9601024. Throughput: 0: 869.6. Samples: 2400258. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:09:18,841][00338] Avg episode reward: [(0, '29.912')] +[2024-09-22 16:09:23,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 9621504. Throughput: 0: 892.4. Samples: 2403566. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:09:23,846][00338] Avg episode reward: [(0, '29.932')] +[2024-09-22 16:09:24,308][02365] Updated weights for policy 0, policy_version 2350 (0.0018) +[2024-09-22 16:09:28,839][00338] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 9637888. Throughput: 0: 915.9. Samples: 2409202. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:09:28,847][00338] Avg episode reward: [(0, '30.072')] +[2024-09-22 16:09:33,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3679.5). Total num frames: 9654272. Throughput: 0: 876.9. Samples: 2413750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:09:33,841][00338] Avg episode reward: [(0, '29.140')] +[2024-09-22 16:09:36,416][02365] Updated weights for policy 0, policy_version 2360 (0.0023) +[2024-09-22 16:09:38,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3693.3). Total num frames: 9674752. Throughput: 0: 882.6. Samples: 2417082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:09:38,842][00338] Avg episode reward: [(0, '29.453')] +[2024-09-22 16:09:43,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 9695232. Throughput: 0: 942.7. Samples: 2423726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:09:43,846][00338] Avg episode reward: [(0, '29.201')] +[2024-09-22 16:09:47,760][02365] Updated weights for policy 0, policy_version 2370 (0.0024) +[2024-09-22 16:09:48,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 9707520. Throughput: 0: 896.6. Samples: 2427736. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:09:48,847][00338] Avg episode reward: [(0, '28.028')] +[2024-09-22 16:09:53,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3679.5). Total num frames: 9728000. Throughput: 0: 887.6. Samples: 2430514. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:09:53,846][00338] Avg episode reward: [(0, '28.608')] +[2024-09-22 16:09:57,547][02365] Updated weights for policy 0, policy_version 2380 (0.0013) +[2024-09-22 16:09:58,840][00338] Fps is (10 sec: 4505.5, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 9752576. Throughput: 0: 937.1. Samples: 2437360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:09:58,842][00338] Avg episode reward: [(0, '29.160')] +[2024-09-22 16:10:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3665.6). Total num frames: 9768960. Throughput: 0: 937.6. Samples: 2442450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:10:03,846][00338] Avg episode reward: [(0, '29.427')] +[2024-09-22 16:10:08,839][00338] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 9785344. Throughput: 0: 910.3. Samples: 2444530. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:10:08,845][00338] Avg episode reward: [(0, '29.770')] +[2024-09-22 16:10:09,350][02365] Updated weights for policy 0, policy_version 2390 (0.0015) +[2024-09-22 16:10:13,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 9805824. Throughput: 0: 934.4. Samples: 2451252. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:10:13,847][00338] Avg episode reward: [(0, '29.038')] +[2024-09-22 16:10:18,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3679.5). Total num frames: 9826304. Throughput: 0: 962.8. Samples: 2457078. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-09-22 16:10:18,842][00338] Avg episode reward: [(0, '29.128')] +[2024-09-22 16:10:20,068][02365] Updated weights for policy 0, policy_version 2400 (0.0036) +[2024-09-22 16:10:23,840][00338] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 9838592. Throughput: 0: 932.2. Samples: 2459032. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 16:10:23,846][00338] Avg episode reward: [(0, '29.717')] +[2024-09-22 16:10:28,839][00338] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3693.3). Total num frames: 9863168. Throughput: 0: 908.7. Samples: 2464616. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 16:10:28,842][00338] Avg episode reward: [(0, '30.508')] +[2024-09-22 16:10:28,855][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002408_9863168.pth... +[2024-09-22 16:10:29,010][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002193_8982528.pth +[2024-09-22 16:10:30,745][02365] Updated weights for policy 0, policy_version 2410 (0.0034) +[2024-09-22 16:10:33,842][00338] Fps is (10 sec: 4504.7, 60 sec: 3822.8, 300 sec: 3679.4). Total num frames: 9883648. Throughput: 0: 963.3. Samples: 2471086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:10:33,844][00338] Avg episode reward: [(0, '30.941')] +[2024-09-22 16:10:38,839][00338] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 9895936. Throughput: 0: 946.9. Samples: 2473124. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 16:10:38,842][00338] Avg episode reward: [(0, '31.595')] +[2024-09-22 16:10:42,852][02365] Updated weights for policy 0, policy_version 2420 (0.0032) +[2024-09-22 16:10:43,840][00338] Fps is (10 sec: 2867.8, 60 sec: 3618.1, 300 sec: 3665.6). Total num frames: 9912320. Throughput: 0: 898.5. Samples: 2477792. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 16:10:43,847][00338] Avg episode reward: [(0, '31.141')] +[2024-09-22 16:10:48,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3679.5). Total num frames: 9936896. Throughput: 0: 927.9. Samples: 2484204. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 16:10:48,843][00338] Avg episode reward: [(0, '31.017')] +[2024-09-22 16:10:53,839][00338] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 9949184. Throughput: 0: 944.1. Samples: 2487016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:10:53,842][00338] Avg episode reward: [(0, '31.196')] +[2024-09-22 16:10:53,964][02365] Updated weights for policy 0, policy_version 2430 (0.0034) +[2024-09-22 16:10:58,839][00338] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3651.7). Total num frames: 9965568. Throughput: 0: 881.8. Samples: 2490934. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 16:10:58,848][00338] Avg episode reward: [(0, '31.272')] +[2024-09-22 16:11:03,839][00338] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 9990144. Throughput: 0: 898.2. Samples: 2497498. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 16:11:03,844][00338] Avg episode reward: [(0, '30.257')] +[2024-09-22 16:11:04,697][02365] Updated weights for policy 0, policy_version 2440 (0.0024) +[2024-09-22 16:11:07,458][02352] Stopping Batcher_0... +[2024-09-22 16:11:07,459][02352] Loop batcher_evt_loop terminating... +[2024-09-22 16:11:07,460][00338] Component Batcher_0 stopped! +[2024-09-22 16:11:07,465][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-09-22 16:11:07,535][02365] Weights refcount: 2 0 +[2024-09-22 16:11:07,537][02365] Stopping InferenceWorker_p0-w0... +[2024-09-22 16:11:07,538][02365] Loop inference_proc0-0_evt_loop terminating... +[2024-09-22 16:11:07,538][00338] Component InferenceWorker_p0-w0 stopped! +[2024-09-22 16:11:07,682][02352] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002300_9420800.pth +[2024-09-22 16:11:07,709][02352] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-09-22 16:11:07,952][02352] Stopping LearnerWorker_p0... +[2024-09-22 16:11:07,955][02352] Loop learner_proc0_evt_loop terminating... +[2024-09-22 16:11:07,954][00338] Component LearnerWorker_p0 stopped! +[2024-09-22 16:11:08,198][02373] Stopping RolloutWorker_w7... +[2024-09-22 16:11:08,198][02373] Loop rollout_proc7_evt_loop terminating... +[2024-09-22 16:11:08,193][00338] Component RolloutWorker_w7 stopped! +[2024-09-22 16:11:08,220][02367] Stopping RolloutWorker_w1... +[2024-09-22 16:11:08,220][00338] Component RolloutWorker_w1 stopped! +[2024-09-22 16:11:08,221][02367] Loop rollout_proc1_evt_loop terminating... +[2024-09-22 16:11:08,234][02370] Stopping RolloutWorker_w5... +[2024-09-22 16:11:08,243][00338] Component RolloutWorker_w5 stopped! +[2024-09-22 16:11:08,252][02369] Stopping RolloutWorker_w3... +[2024-09-22 16:11:08,252][00338] Component RolloutWorker_w3 stopped! +[2024-09-22 16:11:08,235][02370] Loop rollout_proc5_evt_loop terminating... +[2024-09-22 16:11:08,253][02369] Loop rollout_proc3_evt_loop terminating... +[2024-09-22 16:11:08,349][00338] Component RolloutWorker_w4 stopped! +[2024-09-22 16:11:08,352][02371] Stopping RolloutWorker_w4... +[2024-09-22 16:11:08,352][02371] Loop rollout_proc4_evt_loop terminating... +[2024-09-22 16:11:08,433][02372] Stopping RolloutWorker_w6... +[2024-09-22 16:11:08,434][02372] Loop rollout_proc6_evt_loop terminating... +[2024-09-22 16:11:08,433][00338] Component RolloutWorker_w6 stopped! +[2024-09-22 16:11:08,445][02368] Stopping RolloutWorker_w2... +[2024-09-22 16:11:08,445][00338] Component RolloutWorker_w2 stopped! +[2024-09-22 16:11:08,450][02368] Loop rollout_proc2_evt_loop terminating... +[2024-09-22 16:11:08,468][00338] Component RolloutWorker_w0 stopped! +[2024-09-22 16:11:08,471][00338] Waiting for process learner_proc0 to stop... +[2024-09-22 16:11:08,468][02366] Stopping RolloutWorker_w0... +[2024-09-22 16:11:08,476][02366] Loop rollout_proc0_evt_loop terminating... +[2024-09-22 16:11:10,013][00338] Waiting for process inference_proc0-0 to join... +[2024-09-22 16:11:10,276][00338] Waiting for process rollout_proc0 to join... +[2024-09-22 16:11:12,827][00338] Waiting for process rollout_proc1 to join... +[2024-09-22 16:11:12,831][00338] Waiting for process rollout_proc2 to join... +[2024-09-22 16:11:12,834][00338] Waiting for process rollout_proc3 to join... +[2024-09-22 16:11:12,839][00338] Waiting for process rollout_proc4 to join... +[2024-09-22 16:11:12,843][00338] Waiting for process rollout_proc5 to join... +[2024-09-22 16:11:12,847][00338] Waiting for process rollout_proc6 to join... +[2024-09-22 16:11:12,851][00338] Waiting for process rollout_proc7 to join... +[2024-09-22 16:11:12,855][00338] Batcher 0 profile tree view: +batching: 66.6346, releasing_batches: 0.0745 +[2024-09-22 16:11:12,857][00338] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 1016.4153 +update_model: 22.9892 + weight_update: 0.0029 +one_step: 0.0116 + handle_policy_step: 1505.1943 + deserialize: 36.7217, stack: 7.8323, obs_to_device_normalize: 306.3891, forward: 803.5875, send_messages: 72.8581 + prepare_outputs: 204.0244 + to_cpu: 117.1595 +[2024-09-22 16:11:12,859][00338] Learner 0 profile tree view: +misc: 0.0127, prepare_batch: 29.6141 +train: 178.8906 + epoch_init: 0.0190, minibatch_init: 0.0305, losses_postprocess: 1.6527, kl_divergence: 1.5693, after_optimizer: 85.3035 + calculate_losses: 61.8876 + losses_init: 0.0208, forward_head: 2.7074, bptt_initial: 41.4459, tail: 2.5972, advantages_returns: 0.7053, losses: 8.8913 + bptt: 4.7191 + bptt_forward_core: 4.4880 + update: 26.7608 + clip: 2.0785 +[2024-09-22 16:11:12,861][00338] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.8633, enqueue_policy_requests: 245.6178, env_step: 2076.8457, overhead: 34.0852, complete_rollouts: 17.8207 +save_policy_outputs: 53.1403 + split_output_tensors: 21.0932 +[2024-09-22 16:11:12,862][00338] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.8021, enqueue_policy_requests: 249.0436, env_step: 2074.5814, overhead: 34.2225, complete_rollouts: 17.7478 +save_policy_outputs: 52.5804 + split_output_tensors: 20.9781 +[2024-09-22 16:11:12,864][00338] Loop Runner_EvtLoop terminating... +[2024-09-22 16:11:12,865][00338] Runner profile tree view: +main_loop: 2677.2555 +[2024-09-22 16:11:12,866][00338] Collected {0: 10006528}, FPS: 3737.6 +[2024-09-22 16:11:21,064][00338] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-22 16:11:21,066][00338] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-22 16:11:21,069][00338] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-22 16:11:21,071][00338] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-22 16:11:21,073][00338] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-22 16:11:21,075][00338] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-22 16:11:21,076][00338] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-09-22 16:11:21,078][00338] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-22 16:11:21,080][00338] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-09-22 16:11:21,081][00338] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-09-22 16:11:21,082][00338] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-22 16:11:21,083][00338] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-22 16:11:21,085][00338] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-22 16:11:21,086][00338] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-22 16:11:21,087][00338] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-22 16:11:21,121][00338] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 16:11:21,124][00338] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 16:11:21,127][00338] RunningMeanStd input shape: (1,) +[2024-09-22 16:11:21,143][00338] ConvEncoder: input_channels=3 +[2024-09-22 16:11:21,254][00338] Conv encoder output size: 512 +[2024-09-22 16:11:21,256][00338] Policy head output size: 512 +[2024-09-22 16:11:21,447][00338] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-09-22 16:11:22,599][00338] Num frames 100... +[2024-09-22 16:11:22,770][00338] Num frames 200... +[2024-09-22 16:11:22,952][00338] Num frames 300... +[2024-09-22 16:11:23,124][00338] Num frames 400... +[2024-09-22 16:11:23,298][00338] Num frames 500... +[2024-09-22 16:11:23,485][00338] Num frames 600... +[2024-09-22 16:11:23,678][00338] Num frames 700... +[2024-09-22 16:11:23,855][00338] Num frames 800... +[2024-09-22 16:11:24,029][00338] Num frames 900... +[2024-09-22 16:11:24,156][00338] Num frames 1000... +[2024-09-22 16:11:24,284][00338] Num frames 1100... +[2024-09-22 16:11:24,409][00338] Num frames 1200... +[2024-09-22 16:11:24,543][00338] Num frames 1300... +[2024-09-22 16:11:24,694][00338] Avg episode rewards: #0: 35.760, true rewards: #0: 13.760 +[2024-09-22 16:11:24,696][00338] Avg episode reward: 35.760, avg true_objective: 13.760 +[2024-09-22 16:11:24,731][00338] Num frames 1400... +[2024-09-22 16:11:24,852][00338] Num frames 1500... +[2024-09-22 16:11:24,976][00338] Num frames 1600... +[2024-09-22 16:11:25,097][00338] Num frames 1700... +[2024-09-22 16:11:25,220][00338] Num frames 1800... +[2024-09-22 16:11:25,345][00338] Num frames 1900... +[2024-09-22 16:11:25,468][00338] Num frames 2000... +[2024-09-22 16:11:25,610][00338] Num frames 2100... +[2024-09-22 16:11:25,733][00338] Num frames 2200... +[2024-09-22 16:11:25,852][00338] Num frames 2300... +[2024-09-22 16:11:25,976][00338] Num frames 2400... +[2024-09-22 16:11:26,102][00338] Num frames 2500... +[2024-09-22 16:11:26,230][00338] Num frames 2600... +[2024-09-22 16:11:26,356][00338] Num frames 2700... +[2024-09-22 16:11:26,483][00338] Num frames 2800... +[2024-09-22 16:11:26,634][00338] Num frames 2900... +[2024-09-22 16:11:26,761][00338] Num frames 3000... +[2024-09-22 16:11:26,886][00338] Num frames 3100... +[2024-09-22 16:11:27,009][00338] Num frames 3200... +[2024-09-22 16:11:27,135][00338] Num frames 3300... +[2024-09-22 16:11:27,266][00338] Num frames 3400... +[2024-09-22 16:11:27,418][00338] Avg episode rewards: #0: 47.879, true rewards: #0: 17.380 +[2024-09-22 16:11:27,419][00338] Avg episode reward: 47.879, avg true_objective: 17.380 +[2024-09-22 16:11:27,453][00338] Num frames 3500... +[2024-09-22 16:11:27,591][00338] Num frames 3600... +[2024-09-22 16:11:27,713][00338] Num frames 3700... +[2024-09-22 16:11:27,831][00338] Num frames 3800... +[2024-09-22 16:11:27,955][00338] Num frames 3900... +[2024-09-22 16:11:28,039][00338] Avg episode rewards: #0: 34.413, true rewards: #0: 13.080 +[2024-09-22 16:11:28,041][00338] Avg episode reward: 34.413, avg true_objective: 13.080 +[2024-09-22 16:11:28,132][00338] Num frames 4000... +[2024-09-22 16:11:28,258][00338] Num frames 4100... +[2024-09-22 16:11:28,379][00338] Num frames 4200... +[2024-09-22 16:11:28,510][00338] Num frames 4300... +[2024-09-22 16:11:28,652][00338] Num frames 4400... +[2024-09-22 16:11:28,776][00338] Num frames 4500... +[2024-09-22 16:11:28,901][00338] Num frames 4600... +[2024-09-22 16:11:29,024][00338] Num frames 4700... +[2024-09-22 16:11:29,109][00338] Avg episode rewards: #0: 30.060, true rewards: #0: 11.810 +[2024-09-22 16:11:29,111][00338] Avg episode reward: 30.060, avg true_objective: 11.810 +[2024-09-22 16:11:29,208][00338] Num frames 4800... +[2024-09-22 16:11:29,332][00338] Num frames 4900... +[2024-09-22 16:11:29,456][00338] Num frames 5000... +[2024-09-22 16:11:29,589][00338] Num frames 5100... +[2024-09-22 16:11:29,721][00338] Num frames 5200... +[2024-09-22 16:11:29,846][00338] Num frames 5300... +[2024-09-22 16:11:29,966][00338] Num frames 5400... +[2024-09-22 16:11:30,066][00338] Avg episode rewards: #0: 26.672, true rewards: #0: 10.872 +[2024-09-22 16:11:30,067][00338] Avg episode reward: 26.672, avg true_objective: 10.872 +[2024-09-22 16:11:30,150][00338] Num frames 5500... +[2024-09-22 16:11:30,273][00338] Num frames 5600... +[2024-09-22 16:11:30,397][00338] Num frames 5700... +[2024-09-22 16:11:30,524][00338] Num frames 5800... +[2024-09-22 16:11:30,648][00338] Num frames 5900... +[2024-09-22 16:11:30,784][00338] Num frames 6000... +[2024-09-22 16:11:30,906][00338] Num frames 6100... +[2024-09-22 16:11:31,032][00338] Num frames 6200... +[2024-09-22 16:11:31,155][00338] Num frames 6300... +[2024-09-22 16:11:31,282][00338] Num frames 6400... +[2024-09-22 16:11:31,403][00338] Num frames 6500... +[2024-09-22 16:11:31,573][00338] Avg episode rewards: #0: 27.147, true rewards: #0: 10.980 +[2024-09-22 16:11:31,576][00338] Avg episode reward: 27.147, avg true_objective: 10.980 +[2024-09-22 16:11:31,594][00338] Num frames 6600... +[2024-09-22 16:11:31,734][00338] Num frames 6700... +[2024-09-22 16:11:31,853][00338] Num frames 6800... +[2024-09-22 16:11:31,973][00338] Num frames 6900... +[2024-09-22 16:11:32,095][00338] Num frames 7000... +[2024-09-22 16:11:32,214][00338] Num frames 7100... +[2024-09-22 16:11:32,339][00338] Num frames 7200... +[2024-09-22 16:11:32,460][00338] Num frames 7300... +[2024-09-22 16:11:32,596][00338] Num frames 7400... +[2024-09-22 16:11:32,724][00338] Num frames 7500... +[2024-09-22 16:11:32,851][00338] Num frames 7600... +[2024-09-22 16:11:32,976][00338] Num frames 7700... +[2024-09-22 16:11:33,119][00338] Avg episode rewards: #0: 27.531, true rewards: #0: 11.103 +[2024-09-22 16:11:33,121][00338] Avg episode reward: 27.531, avg true_objective: 11.103 +[2024-09-22 16:11:33,159][00338] Num frames 7800... +[2024-09-22 16:11:33,285][00338] Num frames 7900... +[2024-09-22 16:11:33,407][00338] Num frames 8000... +[2024-09-22 16:11:33,539][00338] Num frames 8100... +[2024-09-22 16:11:33,661][00338] Num frames 8200... +[2024-09-22 16:11:33,791][00338] Num frames 8300... +[2024-09-22 16:11:33,917][00338] Num frames 8400... +[2024-09-22 16:11:34,056][00338] Num frames 8500... +[2024-09-22 16:11:34,229][00338] Num frames 8600... +[2024-09-22 16:11:34,395][00338] Num frames 8700... +[2024-09-22 16:11:34,578][00338] Num frames 8800... +[2024-09-22 16:11:34,752][00338] Num frames 8900... +[2024-09-22 16:11:34,929][00338] Num frames 9000... +[2024-09-22 16:11:35,096][00338] Num frames 9100... +[2024-09-22 16:11:35,266][00338] Num frames 9200... +[2024-09-22 16:11:35,444][00338] Num frames 9300... +[2024-09-22 16:11:35,617][00338] Num frames 9400... +[2024-09-22 16:11:35,798][00338] Num frames 9500... +[2024-09-22 16:11:35,983][00338] Num frames 9600... +[2024-09-22 16:11:36,161][00338] Num frames 9700... +[2024-09-22 16:11:36,350][00338] Num frames 9800... +[2024-09-22 16:11:36,527][00338] Avg episode rewards: #0: 31.590, true rewards: #0: 12.340 +[2024-09-22 16:11:36,529][00338] Avg episode reward: 31.590, avg true_objective: 12.340 +[2024-09-22 16:11:36,571][00338] Num frames 9900... +[2024-09-22 16:11:36,693][00338] Num frames 10000... +[2024-09-22 16:11:36,813][00338] Num frames 10100... +[2024-09-22 16:11:36,941][00338] Num frames 10200... +[2024-09-22 16:11:37,060][00338] Num frames 10300... +[2024-09-22 16:11:37,179][00338] Num frames 10400... +[2024-09-22 16:11:37,301][00338] Num frames 10500... +[2024-09-22 16:11:37,424][00338] Num frames 10600... +[2024-09-22 16:11:37,532][00338] Avg episode rewards: #0: 29.489, true rewards: #0: 11.822 +[2024-09-22 16:11:37,533][00338] Avg episode reward: 29.489, avg true_objective: 11.822 +[2024-09-22 16:11:37,608][00338] Num frames 10700... +[2024-09-22 16:11:37,728][00338] Num frames 10800... +[2024-09-22 16:11:37,851][00338] Num frames 10900... +[2024-09-22 16:11:37,980][00338] Num frames 11000... +[2024-09-22 16:11:38,100][00338] Num frames 11100... +[2024-09-22 16:11:38,223][00338] Num frames 11200... +[2024-09-22 16:11:38,344][00338] Num frames 11300... +[2024-09-22 16:11:38,467][00338] Num frames 11400... +[2024-09-22 16:11:38,600][00338] Num frames 11500... +[2024-09-22 16:11:38,727][00338] Num frames 11600... +[2024-09-22 16:11:38,851][00338] Num frames 11700... +[2024-09-22 16:11:38,982][00338] Num frames 11800... +[2024-09-22 16:11:39,104][00338] Num frames 11900... +[2024-09-22 16:11:39,230][00338] Num frames 12000... +[2024-09-22 16:11:39,359][00338] Num frames 12100... +[2024-09-22 16:11:39,483][00338] Num frames 12200... +[2024-09-22 16:11:39,611][00338] Num frames 12300... +[2024-09-22 16:11:39,739][00338] Num frames 12400... +[2024-09-22 16:11:39,864][00338] Num frames 12500... +[2024-09-22 16:11:39,993][00338] Num frames 12600... +[2024-09-22 16:11:40,122][00338] Avg episode rewards: #0: 31.858, true rewards: #0: 12.658 +[2024-09-22 16:11:40,124][00338] Avg episode reward: 31.858, avg true_objective: 12.658 +[2024-09-22 16:12:57,362][00338] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-22 16:14:37,852][00338] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-22 16:14:37,854][00338] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-22 16:14:37,856][00338] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-22 16:14:37,858][00338] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-22 16:14:37,860][00338] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-22 16:14:37,862][00338] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-22 16:14:37,864][00338] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-09-22 16:14:37,865][00338] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-22 16:14:37,866][00338] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-09-22 16:14:37,867][00338] Adding new argument 'hf_repository'='kalmi901/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-09-22 16:14:37,868][00338] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-22 16:14:37,869][00338] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-22 16:14:37,870][00338] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-22 16:14:37,871][00338] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-22 16:14:37,872][00338] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-22 16:14:37,900][00338] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 16:14:37,902][00338] RunningMeanStd input shape: (1,) +[2024-09-22 16:14:37,914][00338] ConvEncoder: input_channels=3 +[2024-09-22 16:14:37,952][00338] Conv encoder output size: 512 +[2024-09-22 16:14:37,954][00338] Policy head output size: 512 +[2024-09-22 16:14:37,973][00338] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2024-09-22 16:14:38,403][00338] Num frames 100... +[2024-09-22 16:14:38,537][00338] Num frames 200... +[2024-09-22 16:14:38,681][00338] Num frames 300... +[2024-09-22 16:14:38,803][00338] Num frames 400... +[2024-09-22 16:14:38,924][00338] Num frames 500... +[2024-09-22 16:14:39,049][00338] Num frames 600... +[2024-09-22 16:14:39,173][00338] Num frames 700... +[2024-09-22 16:14:39,294][00338] Num frames 800... +[2024-09-22 16:14:39,418][00338] Num frames 900... +[2024-09-22 16:14:39,546][00338] Num frames 1000... +[2024-09-22 16:14:39,677][00338] Num frames 1100... +[2024-09-22 16:14:39,801][00338] Num frames 1200... +[2024-09-22 16:14:39,952][00338] Avg episode rewards: #0: 30.800, true rewards: #0: 12.800 +[2024-09-22 16:14:39,954][00338] Avg episode reward: 30.800, avg true_objective: 12.800 +[2024-09-22 16:14:39,983][00338] Num frames 1300... +[2024-09-22 16:14:40,114][00338] Num frames 1400... +[2024-09-22 16:14:40,240][00338] Num frames 1500... +[2024-09-22 16:14:40,365][00338] Num frames 1600... +[2024-09-22 16:14:40,486][00338] Num frames 1700... +[2024-09-22 16:14:40,620][00338] Num frames 1800... +[2024-09-22 16:14:40,761][00338] Num frames 1900... +[2024-09-22 16:14:40,884][00338] Num frames 2000... +[2024-09-22 16:14:41,010][00338] Num frames 2100... +[2024-09-22 16:14:41,135][00338] Num frames 2200... +[2024-09-22 16:14:41,260][00338] Num frames 2300... +[2024-09-22 16:14:41,382][00338] Num frames 2400... +[2024-09-22 16:14:41,490][00338] Avg episode rewards: #0: 31.715, true rewards: #0: 12.215 +[2024-09-22 16:14:41,493][00338] Avg episode reward: 31.715, avg true_objective: 12.215 +[2024-09-22 16:14:41,575][00338] Num frames 2500... +[2024-09-22 16:14:41,706][00338] Num frames 2600... +[2024-09-22 16:14:41,829][00338] Num frames 2700... +[2024-09-22 16:14:41,952][00338] Num frames 2800... +[2024-09-22 16:14:42,077][00338] Num frames 2900... +[2024-09-22 16:14:42,203][00338] Num frames 3000... +[2024-09-22 16:14:42,360][00338] Avg episode rewards: #0: 25.613, true rewards: #0: 10.280 +[2024-09-22 16:14:42,361][00338] Avg episode reward: 25.613, avg true_objective: 10.280 +[2024-09-22 16:14:42,385][00338] Num frames 3100... +[2024-09-22 16:14:42,514][00338] Num frames 3200... +[2024-09-22 16:14:42,637][00338] Num frames 3300... +[2024-09-22 16:14:42,771][00338] Num frames 3400... +[2024-09-22 16:14:42,894][00338] Num frames 3500... +[2024-09-22 16:14:43,021][00338] Num frames 3600... +[2024-09-22 16:14:43,145][00338] Num frames 3700... +[2024-09-22 16:14:43,269][00338] Num frames 3800... +[2024-09-22 16:14:43,392][00338] Num frames 3900... +[2024-09-22 16:14:43,521][00338] Num frames 4000... +[2024-09-22 16:14:43,647][00338] Num frames 4100... +[2024-09-22 16:14:43,779][00338] Num frames 4200... +[2024-09-22 16:14:43,902][00338] Num frames 4300... +[2024-09-22 16:14:44,025][00338] Num frames 4400... +[2024-09-22 16:14:44,115][00338] Avg episode rewards: #0: 27.070, true rewards: #0: 11.070 +[2024-09-22 16:14:44,117][00338] Avg episode reward: 27.070, avg true_objective: 11.070 +[2024-09-22 16:14:44,211][00338] Num frames 4500... +[2024-09-22 16:14:44,336][00338] Num frames 4600... +[2024-09-22 16:14:44,462][00338] Num frames 4700... +[2024-09-22 16:14:44,595][00338] Num frames 4800... +[2024-09-22 16:14:44,717][00338] Num frames 4900... +[2024-09-22 16:14:44,849][00338] Num frames 5000... +[2024-09-22 16:14:44,974][00338] Num frames 5100... +[2024-09-22 16:14:45,079][00338] Avg episode rewards: #0: 24.680, true rewards: #0: 10.280 +[2024-09-22 16:14:45,081][00338] Avg episode reward: 24.680, avg true_objective: 10.280 +[2024-09-22 16:14:45,159][00338] Num frames 5200... +[2024-09-22 16:14:45,281][00338] Num frames 5300... +[2024-09-22 16:14:45,402][00338] Num frames 5400... +[2024-09-22 16:14:45,493][00338] Avg episode rewards: #0: 21.213, true rewards: #0: 9.047 +[2024-09-22 16:14:45,494][00338] Avg episode reward: 21.213, avg true_objective: 9.047 +[2024-09-22 16:14:45,590][00338] Num frames 5500... +[2024-09-22 16:14:45,716][00338] Num frames 5600... +[2024-09-22 16:14:45,848][00338] Num frames 5700... +[2024-09-22 16:14:45,966][00338] Num frames 5800... +[2024-09-22 16:14:46,084][00338] Num frames 5900... +[2024-09-22 16:14:46,206][00338] Num frames 6000... +[2024-09-22 16:14:46,323][00338] Num frames 6100... +[2024-09-22 16:14:46,450][00338] Num frames 6200... +[2024-09-22 16:14:46,580][00338] Num frames 6300... +[2024-09-22 16:14:46,701][00338] Num frames 6400... +[2024-09-22 16:14:46,850][00338] Num frames 6500... +[2024-09-22 16:14:46,989][00338] Num frames 6600... +[2024-09-22 16:14:47,060][00338] Avg episode rewards: #0: 22.017, true rewards: #0: 9.446 +[2024-09-22 16:14:47,064][00338] Avg episode reward: 22.017, avg true_objective: 9.446 +[2024-09-22 16:14:47,217][00338] Num frames 6700... +[2024-09-22 16:14:47,384][00338] Num frames 6800... +[2024-09-22 16:14:47,566][00338] Num frames 6900... +[2024-09-22 16:14:47,738][00338] Num frames 7000... +[2024-09-22 16:14:47,902][00338] Num frames 7100... +[2024-09-22 16:14:48,078][00338] Num frames 7200... +[2024-09-22 16:14:48,248][00338] Num frames 7300... +[2024-09-22 16:14:48,429][00338] Num frames 7400... +[2024-09-22 16:14:48,609][00338] Num frames 7500... +[2024-09-22 16:14:48,781][00338] Num frames 7600... +[2024-09-22 16:14:48,955][00338] Num frames 7700... +[2024-09-22 16:14:49,137][00338] Num frames 7800... +[2024-09-22 16:14:49,314][00338] Num frames 7900... +[2024-09-22 16:14:49,492][00338] Num frames 8000... +[2024-09-22 16:14:49,656][00338] Num frames 8100... +[2024-09-22 16:14:49,779][00338] Num frames 8200... +[2024-09-22 16:14:49,899][00338] Num frames 8300... +[2024-09-22 16:14:50,018][00338] Num frames 8400... +[2024-09-22 16:14:50,142][00338] Num frames 8500... +[2024-09-22 16:14:50,276][00338] Num frames 8600... +[2024-09-22 16:14:50,401][00338] Num frames 8700... +[2024-09-22 16:14:50,472][00338] Avg episode rewards: #0: 26.515, true rewards: #0: 10.890 +[2024-09-22 16:14:50,474][00338] Avg episode reward: 26.515, avg true_objective: 10.890 +[2024-09-22 16:14:50,594][00338] Num frames 8800... +[2024-09-22 16:14:50,716][00338] Num frames 8900... +[2024-09-22 16:14:50,842][00338] Num frames 9000... +[2024-09-22 16:14:50,964][00338] Num frames 9100... +[2024-09-22 16:14:51,090][00338] Num frames 9200... +[2024-09-22 16:14:51,217][00338] Num frames 9300... +[2024-09-22 16:14:51,350][00338] Num frames 9400... +[2024-09-22 16:14:51,474][00338] Num frames 9500... +[2024-09-22 16:14:51,606][00338] Num frames 9600... +[2024-09-22 16:14:51,708][00338] Avg episode rewards: #0: 25.487, true rewards: #0: 10.709 +[2024-09-22 16:14:51,711][00338] Avg episode reward: 25.487, avg true_objective: 10.709 +[2024-09-22 16:14:51,789][00338] Num frames 9700... +[2024-09-22 16:14:51,915][00338] Num frames 9800... +[2024-09-22 16:14:52,039][00338] Num frames 9900... +[2024-09-22 16:14:52,164][00338] Num frames 10000... +[2024-09-22 16:14:52,295][00338] Num frames 10100... +[2024-09-22 16:14:52,419][00338] Num frames 10200... +[2024-09-22 16:14:52,552][00338] Num frames 10300... +[2024-09-22 16:14:52,676][00338] Num frames 10400... +[2024-09-22 16:14:52,798][00338] Num frames 10500... +[2024-09-22 16:14:52,919][00338] Num frames 10600... +[2024-09-22 16:14:53,038][00338] Num frames 10700... +[2024-09-22 16:14:53,164][00338] Num frames 10800... +[2024-09-22 16:14:53,288][00338] Num frames 10900... +[2024-09-22 16:14:53,377][00338] Avg episode rewards: #0: 25.923, true rewards: #0: 10.923 +[2024-09-22 16:14:53,379][00338] Avg episode reward: 25.923, avg true_objective: 10.923 +[2024-09-22 16:15:59,559][00338] Replay video saved to /content/train_dir/default_experiment/replay.mp4!